Merge pull request #16288 from anntzer/csv2rec

jklymak · web-flow · commit d14ad5f36a16 · 2020-01-22T13:53:50.000-08:00
Remove the private, unused _csv2rec.
diff --git a/doc/faq/howto_faq.rst b/doc/faq/howto_faq.rst
@@ -336,37 +336,13 @@ setting in the right subplots.
 Skip dates where there is no data
 ---------------------------------
 
-When plotting time series, e.g., financial time series, one often wants
-to leave out days on which there is no data, e.g., weekends.  By passing
-in dates on the x-xaxis, you get large horizontal gaps on periods when
-there is not data. The solution is to pass in some proxy x-data, e.g.,
-evenly sampled indices, and then use a custom formatter to format
-these as dates. The example below shows how to use an 'index formatter'
-to achieve the desired plot::
-
-    import numpy as np
-    import matplotlib.pyplot as plt
-    import matplotlib.mlab as mlab
-    import matplotlib.ticker as ticker
-
-    r = mlab.csv2rec('../data/aapl.csv')
-    r.sort()
-    r = r[-30:]  # get the last 30 days
-
-    N = len(r)
-    ind = np.arange(N)  # the evenly spaced plot indices
-
-    def format_date(x, pos=None):
-        thisind = np.clip(int(x+0.5), 0, N-1)
-        return r.date[thisind].strftime('%Y-%m-%d')
-
-    fig = plt.figure()
-    ax = fig.add_subplot(111)
-    ax.plot(ind, r.adj_close, 'o-')
-    ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
-    fig.autofmt_xdate()
-
-    plt.show()
+When plotting time series, e.g., financial time series, one often wants to
+leave out days on which there is no data, e.g., weekends.  By passing in
+dates on the x-xaxis, you get large horizontal gaps on periods when there
+is not data. The solution is to pass in some proxy x-data, e.g., evenly
+sampled indices, and then use a custom formatter to format these as dates.
+:doc:`/gallery/text_labels_and_annotations/date_index_formatter` demonstrates
+how to use an 'index formatter' to achieve the desired plot.
 
 .. _howto-set-zorder:
 
diff --git a/lib/matplotlib/mlab.py b/lib/matplotlib/mlab.py
@@ -53,7 +53,6 @@
     Apply a window along a given axis
 """
 
-import csv
 import functools
 from numbers import Number
 
@@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning,
     return Cxy, f
 
 
-def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
-             converterd=None, names=None, missing='', missingd=None,
-             use_mrecords=False, dayfirst=False, yearfirst=False):
-    """
-    Load data from comma/space/tab delimited file in *fname* into a
-    numpy record array and return the record array.
-
-    If *names* is *None*, a header row is required to automatically
-    assign the recarray names.  The headers will be lower cased,
-    spaces will be converted to underscores, and illegal attribute
-    name characters removed.  If *names* is not *None*, it is a
-    sequence of names to use for the column names.  In this case, it
-    is assumed there is no header row.
-
-
-    - *fname*: can be a filename or a file handle.  Support for gzipped
-      files is automatic, if the filename ends in '.gz'
-
-    - *comments*: the character used to indicate the start of a comment
-      in the file, or *None* to switch off the removal of comments
-
-    - *skiprows*: is the number of rows from the top to skip
-
-    - *checkrows*: is the number of rows to check to validate the column
-      data type.  When set to zero all rows are validated.
-
-    - *converterd*: if not *None*, is a dictionary mapping column number or
-      munged column name to a converter function.
-
-    - *names*: if not None, is a list of header names.  In this case, no
-      header will be read from the file
-
-    - *missingd* is a dictionary mapping munged column names to field values
-      which signify that the field does not contain actual data and should
-      be masked, e.g., '0000-00-00' or 'unused'
-
-    - *missing*: a string whose value signals a missing field regardless of
-      the column it appears in
-
-    - *use_mrecords*: if True, return an mrecords.fromrecords record array if
-      any of the data are missing
-
-    - *dayfirst*: default is False so that MM-DD-YY has precedence over
-      DD-MM-YY.  See
-      http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
-      for further information.
-
-    - *yearfirst*: default is False so that MM-DD-YY has precedence over
-      YY-MM-DD. See
-      http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
-      for further information.
-
-      If no rows are found, *None* is returned
-    """
-
-    if converterd is None:
-        converterd = dict()
-
-    if missingd is None:
-        missingd = {}
-
-    import dateutil.parser
-    import datetime
-
-    fh = cbook.to_filehandle(fname)
-
-    delimiter = str(delimiter)
-
-    class FH:
-        """
-        For space-delimited files, we want different behavior than
-        comma or tab.  Generally, we want multiple spaces to be
-        treated as a single separator, whereas with comma and tab we
-        want multiple commas to return multiple (empty) fields.  The
-        join/strip trick below effects this.
-        """
-        def __init__(self, fh):
-            self.fh = fh
-
-        def close(self):
-            self.fh.close()
-
-        def seek(self, arg):
-            self.fh.seek(arg)
-
-        def fix(self, s):
-            return ' '.join(s.split())
-
-        def __next__(self):
-            return self.fix(next(self.fh))
-
-        def __iter__(self):
-            for line in self.fh:
-                yield self.fix(line)
-
-    if delimiter == ' ':
-        fh = FH(fh)
-
-    reader = csv.reader(fh, delimiter=delimiter)
-
-    def process_skiprows(reader):
-        if skiprows:
-            for i, row in enumerate(reader):
-                if i >= (skiprows-1):
-                    break
-
-        return fh, reader
-
-    process_skiprows(reader)
-
-    def ismissing(name, val):
-        """Return whether the value val in column name should be masked."""
-        return val == missing or val == missingd.get(name) or val == ''
-
-    def with_default_value(func, default):
-        def newfunc(name, val):
-            if ismissing(name, val):
-                return default
-            else:
-                return func(val)
-        return newfunc
-
-    def mybool(x):
-        if x == 'True':
-            return True
-        elif x == 'False':
-            return False
-        else:
-            raise ValueError('invalid bool')
-
-    dateparser = dateutil.parser.parse
-
-    def mydateparser(x):
-        # try and return a datetime object
-        d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
-        return d
-
-    mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1))
-
-    myfloat = with_default_value(float, np.nan)
-    myint = with_default_value(int, -1)
-    mystr = with_default_value(str, '')
-    mybool = with_default_value(mybool, None)
-
-    def mydate(x):
-        # try and return a date object
-        d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
-
-        if d.hour > 0 or d.minute > 0 or d.second > 0:
-            raise ValueError('not a date')
-        return d.date()
-    mydate = with_default_value(mydate, datetime.date(1, 1, 1))
-
-    def get_func(name, item, func):
-        # promote functions in this order
-        funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr]
-        for func in funcs[funcs.index(func):]:
-            try:
-                func(name, item)
-            except Exception:
-                continue
-            return func
-        raise ValueError('Could not find a working conversion function')
-
-    # map column names that clash with builtins -- TODO - extend this list
-    itemd = {
-        'return': 'return_',
-        'file':   'file_',
-        'print':  'print_',
-        }
-
-    def get_converters(reader, comments):
-
-        converters = None
-        i = 0
-        for row in reader:
-            if (len(row) and comments is not None and
-                    row[0].startswith(comments)):
-                continue
-            if i == 0:
-                converters = [mybool]*len(row)
-            if checkrows and i > checkrows:
-                break
-            i += 1
-
-            for j, (name, item) in enumerate(zip(names, row)):
-                func = converterd.get(j)
-                if func is None:
-                    func = converterd.get(name)
-                if func is None:
-                    func = converters[j]
-                    if len(item.strip()):
-                        func = get_func(name, item, func)
-                else:
-                    # how should we handle custom converters and defaults?
-                    func = with_default_value(func, None)
-                converters[j] = func
-        return converters
-
-    # Get header and remove invalid characters
-    needheader = names is None
-
-    if needheader:
-        for row in reader:
-            if (len(row) and comments is not None and
-                    row[0].startswith(comments)):
-                continue
-            headers = row
-            break
-
-        # remove these chars
-        delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""")
-        delete.add('"')
-
-        names = []
-        seen = dict()
-        for i, item in enumerate(headers):
-            item = item.strip().lower().replace(' ', '_')
-            item = ''.join([c for c in item if c not in delete])
-            if not len(item):
-                item = 'column%d' % i
-
-            item = itemd.get(item, item)
-            cnt = seen.get(item, 0)
-            if cnt > 0:
-                names.append(item + '_%d' % cnt)
-            else:
-                names.append(item)
-            seen[item] = cnt+1
-
-    else:
-        if isinstance(names, str):
-            names = [n.strip() for n in names.split(',')]
-
-    # get the converter functions by inspecting checkrows
-    converters = get_converters(reader, comments)
-    if converters is None:
-        raise ValueError('Could not find any valid data in CSV file')
-
-    # reset the reader and start over
-    fh.seek(0)
-    reader = csv.reader(fh, delimiter=delimiter)
-    process_skiprows(reader)
-
-    if needheader:
-        while True:
-            # skip past any comments and consume one line of column header
-            row = next(reader)
-            if (len(row) and comments is not None and
-                    row[0].startswith(comments)):
-                continue
-            break
-
-    # iterate over the remaining rows and convert the data to date
-    # objects, ints, or floats as appropriate
-    rows = []
-    rowmasks = []
-    for i, row in enumerate(reader):
-        if not len(row):
-            continue
-        if comments is not None and row[0].startswith(comments):
-            continue
-        # Ensure that the row returned always has the same nr of elements
-        row.extend([''] * (len(converters) - len(row)))
-        rows.append([func(name, val)
-                     for func, name, val in zip(converters, names, row)])
-        rowmasks.append([ismissing(name, val)
-                         for name, val in zip(names, row)])
-    fh.close()
-
-    if not len(rows):
-        return None
-
-    if use_mrecords and np.any(rowmasks):
-        r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks)
-    else:
-        r = np.rec.fromrecords(rows, names=names)
-    return r
-
-
 class GaussianKDE:
     """
     Representation of a kernel-density estimate using Gaussian kernels.
diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py
@@ -45,7 +45,7 @@
 from matplotlib.artist import Artist
 from matplotlib.axes import Axes, Subplot
 from matplotlib.projections import PolarAxes
-from matplotlib import mlab  # for _csv2rec, detrend_none, window_hanning
+from matplotlib import mlab  # for detrend_none, window_hanning
 from matplotlib.scale import get_scale_docs, get_scale_names
 
 from matplotlib import cm
diff --git a/lib/matplotlib/tests/test_mlab.py b/lib/matplotlib/tests/test_mlab.py