Remove mlab.csv2rec, mlab.rec2csv

dstansby · dstansby · commit 6a89da9183ec · 2018-09-19T18:09:43.000+01:00
diff --git a/doc/api/next_api_changes/2018-09-18-DS.rst b/doc/api/next_api_changes/2018-09-18-DS.rst
@@ -21,4 +21,6 @@ in Matplotlib 2.2 has been removed. See below for a list:
 - `mlab.get_sparse_matrix`
 - `mlab.dist` (use numpy.hypot instead)
 - `mlab.dist_point_to_segment`
+- `mlab.csv2rec` (see the numpy.recarray module)
+- `mlab.rec2csv` (see the numpy.recarray module)
 - `mlab.donothing_callback`
diff --git a/lib/matplotlib/mlab.py b/lib/matplotlib/mlab.py
@@ -1923,287 +1923,6 @@ def extract(r):
     return np.rec.fromrecords(results, names=names)
 
 
-@cbook.deprecated("2.2")
-def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
-            converterd=None, names=None, missing='', missingd=None,
-            use_mrecords=False, dayfirst=False, yearfirst=False):
-    """
-    Load data from comma/space/tab delimited file in *fname* into a
-    numpy record array and return the record array.
-
-    If *names* is *None*, a header row is required to automatically
-    assign the recarray names.  The headers will be lower cased,
-    spaces will be converted to underscores, and illegal attribute
-    name characters removed.  If *names* is not *None*, it is a
-    sequence of names to use for the column names.  In this case, it
-    is assumed there is no header row.
-
-
-    - *fname*: can be a filename or a file handle.  Support for gzipped
-      files is automatic, if the filename ends in '.gz'
-
-    - *comments*: the character used to indicate the start of a comment
-      in the file, or *None* to switch off the removal of comments
-
-    - *skiprows*: is the number of rows from the top to skip
-
-    - *checkrows*: is the number of rows to check to validate the column
-      data type.  When set to zero all rows are validated.
-
-    - *converterd*: if not *None*, is a dictionary mapping column number or
-      munged column name to a converter function.
-
-    - *names*: if not None, is a list of header names.  In this case, no
-      header will be read from the file
-
-    - *missingd* is a dictionary mapping munged column names to field values
-      which signify that the field does not contain actual data and should
-      be masked, e.g., '0000-00-00' or 'unused'
-
-    - *missing*: a string whose value signals a missing field regardless of
-      the column it appears in
-
-    - *use_mrecords*: if True, return an mrecords.fromrecords record array if
-      any of the data are missing
-
-    - *dayfirst*: default is False so that MM-DD-YY has precedence over
-      DD-MM-YY.  See
-      http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
-      for further information.
-
-    - *yearfirst*: default is False so that MM-DD-YY has precedence over
-      YY-MM-DD. See
-      http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
-      for further information.
-
-      If no rows are found, *None* is returned
-    """
-
-    if converterd is None:
-        converterd = dict()
-
-    if missingd is None:
-        missingd = {}
-
-    import dateutil.parser
-    import datetime
-
-    fh = cbook.to_filehandle(fname)
-
-    delimiter = str(delimiter)
-
-    class FH:
-        """
-        For space-delimited files, we want different behavior than
-        comma or tab.  Generally, we want multiple spaces to be
-        treated as a single separator, whereas with comma and tab we
-        want multiple commas to return multiple (empty) fields.  The
-        join/strip trick below effects this.
-        """
-        def __init__(self, fh):
-            self.fh = fh
-
-        def close(self):
-            self.fh.close()
-
-        def seek(self, arg):
-            self.fh.seek(arg)
-
-        def fix(self, s):
-            return ' '.join(s.split())
-
-        def __next__(self):
-            return self.fix(next(self.fh))
-
-        def __iter__(self):
-            for line in self.fh:
-                yield self.fix(line)
-
-    if delimiter == ' ':
-        fh = FH(fh)
-
-    reader = csv.reader(fh, delimiter=delimiter)
-
-    def process_skiprows(reader):
-        if skiprows:
-            for i, row in enumerate(reader):
-                if i >= (skiprows-1):
-                    break
-
-        return fh, reader
-
-    process_skiprows(reader)
-
-    def ismissing(name, val):
-        "Should the value val in column name be masked?"
-        return val == missing or val == missingd.get(name) or val == ''
-
-    def with_default_value(func, default):
-        def newfunc(name, val):
-            if ismissing(name, val):
-                return default
-            else:
-                return func(val)
-        return newfunc
-
-    def mybool(x):
-        if x == 'True':
-            return True
-        elif x == 'False':
-            return False
-        else:
-            raise ValueError('invalid bool')
-
-    dateparser = dateutil.parser.parse
-
-    def mydateparser(x):
-        # try and return a datetime object
-        d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
-        return d
-
-    mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1))
-
-    myfloat = with_default_value(float, np.nan)
-    myint = with_default_value(int, -1)
-    mystr = with_default_value(str, '')
-    mybool = with_default_value(mybool, None)
-
-    def mydate(x):
-        # try and return a date object
-        d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
-
-        if d.hour > 0 or d.minute > 0 or d.second > 0:
-            raise ValueError('not a date')
-        return d.date()
-    mydate = with_default_value(mydate, datetime.date(1, 1, 1))
-
-    def get_func(name, item, func):
-        # promote functions in this order
-        funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr]
-        for func in funcs[funcs.index(func):]:
-            try:
-                func(name, item)
-            except Exception:
-                continue
-            return func
-        raise ValueError('Could not find a working conversion function')
-
-    # map column names that clash with builtins -- TODO - extend this list
-    itemd = {
-        'return': 'return_',
-        'file':   'file_',
-        'print':  'print_',
-        }
-
-    def get_converters(reader, comments):
-
-        converters = None
-        i = 0
-        for row in reader:
-            if (len(row) and comments is not None and
-                    row[0].startswith(comments)):
-                continue
-            if i == 0:
-                converters = [mybool]*len(row)
-            if checkrows and i > checkrows:
-                break
-            i += 1
-
-            for j, (name, item) in enumerate(zip(names, row)):
-                func = converterd.get(j)
-                if func is None:
-                    func = converterd.get(name)
-                if func is None:
-                    func = converters[j]
-                    if len(item.strip()):
-                        func = get_func(name, item, func)
-                else:
-                    # how should we handle custom converters and defaults?
-                    func = with_default_value(func, None)
-                converters[j] = func
-        return converters
-
-    # Get header and remove invalid characters
-    needheader = names is None
-
-    if needheader:
-        for row in reader:
-            if (len(row) and comments is not None and
-                    row[0].startswith(comments)):
-                continue
-            headers = row
-            break
-
-        # remove these chars
-        delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""")
-        delete.add('"')
-
-        names = []
-        seen = dict()
-        for i, item in enumerate(headers):
-            item = item.strip().lower().replace(' ', '_')
-            item = ''.join([c for c in item if c not in delete])
-            if not len(item):
-                item = 'column%d' % i
-
-            item = itemd.get(item, item)
-            cnt = seen.get(item, 0)
-            if cnt > 0:
-                names.append(item + '_%d' % cnt)
-            else:
-                names.append(item)
-            seen[item] = cnt+1
-
-    else:
-        if isinstance(names, str):
-            names = [n.strip() for n in names.split(',')]
-
-    # get the converter functions by inspecting checkrows
-    converters = get_converters(reader, comments)
-    if converters is None:
-        raise ValueError('Could not find any valid data in CSV file')
-
-    # reset the reader and start over
-    fh.seek(0)
-    reader = csv.reader(fh, delimiter=delimiter)
-    process_skiprows(reader)
-
-    if needheader:
-        while True:
-            # skip past any comments and consume one line of column header
-            row = next(reader)
-            if (len(row) and comments is not None and
-                    row[0].startswith(comments)):
-                continue
-            break
-
-    # iterate over the remaining rows and convert the data to date
-    # objects, ints, or floats as appropriate
-    rows = []
-    rowmasks = []
-    for i, row in enumerate(reader):
-        if not len(row):
-            continue
-        if comments is not None and row[0].startswith(comments):
-            continue
-        # Ensure that the row returned always has the same nr of elements
-        row.extend([''] * (len(converters) - len(row)))
-        rows.append([func(name, val)
-                     for func, name, val in zip(converters, names, row)])
-        rowmasks.append([ismissing(name, val)
-                         for name, val in zip(names, row)])
-    fh.close()
-
-    if not len(rows):
-        return None
-
-    if use_mrecords and np.any(rowmasks):
-        r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks)
-    else:
-        r = np.rec.fromrecords(rows, names=names)
-    return r
-
-
 # a series of classes for describing the format intentions of various rec views
 @cbook.deprecated("2.2")
 class FormatObj(object):
@@ -2484,78 +2203,6 @@ def format(item, just_pad_prec_spacer):
     return text
 
 
-@cbook.deprecated("2.2", alternative='numpy.recarray.tofile')
-def rec2csv(r, fname, delimiter=',', formatd=None, missing='',
-            missingd=None, withheader=True):
-    """
-    Save the data from numpy recarray *r* into a
-    comma-/space-/tab-delimited file.  The record array dtype names
-    will be used for column headers.
-
-    *fname*: can be a filename or a file handle.  Support for gzipped
-      files is automatic, if the filename ends in '.gz'
-
-    *withheader*: if withheader is False, do not write the attribute
-      names in the first row
-
-    for formatd type FormatFloat, we override the precision to store
-    full precision floats in the CSV file
-
-    See Also
-    --------
-    :func:`csv2rec`
-        For information about *missing* and *missingd*, which can be used to
-        fill in masked values into your CSV file.
-    """
-
-    delimiter = str(delimiter)
-
-    if missingd is None:
-        missingd = dict()
-
-    def with_mask(func):
-        def newfunc(val, mask, mval):
-            if mask:
-                return mval
-            else:
-                return func(val)
-        return newfunc
-
-    if r.ndim != 1:
-        raise ValueError('rec2csv only operates on 1 dimensional recarrays')
-
-    formatd = get_formatd(r, formatd)
-    funcs = []
-    for i, name in enumerate(r.dtype.names):
-        funcs.append(with_mask(csvformat_factory(formatd[name]).tostr))
-
-    fh, opened = cbook.to_filehandle(fname, 'wb', return_opened=True)
-    writer = csv.writer(fh, delimiter=delimiter)
-    header = r.dtype.names
-    if withheader:
-        writer.writerow(header)
-
-    # Our list of specials for missing values
-    mvals = []
-    for name in header:
-        mvals.append(missingd.get(name, missing))
-
-    ismasked = False
-    if len(r):
-        row = r[0]
-        ismasked = hasattr(row, '_fieldmask')
-
-    for row in r:
-        if ismasked:
-            row, rowmask = row.item(), row._fieldmask.item()
-        else:
-            rowmask = [False] * len(row)
-        writer.writerow([func(val, mask, mval) for func, val, mask, mval
-                         in zip(funcs, row, rowmask, mvals)])
-    if opened:
-        fh.close()
-
-
 @cbook.deprecated('2.2', alternative='scipy.interpolate.griddata')
 def griddata(x, y, z, xi, yi, interp='nn'):
     """
diff --git a/lib/matplotlib/pylab.py b/lib/matplotlib/pylab.py
@@ -230,14 +230,14 @@
 ## We are still importing too many things from mlab; more cleanup is needed.
 
 from matplotlib.mlab import (
-    amap, base_repr, binary_repr, csv2rec,
+    amap, base_repr, binary_repr,
     demean, detrend, detrend_linear, detrend_mean, detrend_none,
     distances_along_curve, exp_safe,
     fftsurr, frange, griddata,
     identity, inside_poly, is_closed_polygon, ispower2, isvector, l1norm,
     l2norm, log2, movavg, norm_flat,
     path_length, poly_below, poly_between,
-    rec2csv, rec_append_fields, rec_drop_fields, rec_join, rms_flat,
+    rec_append_fields, rec_drop_fields, rec_join, rms_flat,
     segments_intersect, slopes, stineman_interp, vector_lengths,
     window_hanning, window_none)
 
diff --git a/lib/matplotlib/tests/test_mlab.py b/lib/matplotlib/tests/test_mlab.py