matplotlib · tacaswell · Feb 11, 2018 · Nov 13, 2017 · Nov 14, 2017 · Feb 8, 2018
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -66,7 +66,7 @@ install:
   - activate test-environment
   - echo %PYTHON_VERSION% %TARGET_ARCH%
   # pytest-cov>=2.3.1 due to https://github.com/pytest-dev/pytest-cov/issues/124
-  - pip install -q "pytest!=3.3.0" "pytest-cov>=2.3.1" pytest-rerunfailures pytest-timeout pytest-xdist
+  - pip install -q "pytest!=3.3.0,>=3.2.0" "pytest-cov>=2.3.1" pytest-rerunfailures pytest-timeout pytest-xdist
 
   # Apply patch to `subprocess` on Python versions > 2 and < 3.6.3
   # https://github.com/matplotlib/matplotlib/issues/9176

diff --git a/.travis.yml b/.travis.yml
@@ -52,7 +52,7 @@ env:
     - NUMPY=numpy
     - PANDAS=
     - PYPARSING=pyparsing
-    - PYTEST=pytest!=3.3.0
+    - PYTEST='pytest!=3.3.0,>=3.2.0'
     - PYTEST_COV=pytest-cov
     - PYTEST_PEP8=
     - SPHINX=sphinx

diff --git a/doc/api/next_api_changes/2018-02-10-HA.rst b/doc/api/next_api_changes/2018-02-10-HA.rst
@@ -0,0 +1,10 @@
+Deprecated `Axis.unit_data`
+```````````````````````````
+
+Use `Axis.units` (which has long existed) instead.
+
+Only accept string-like for Categorical input
+`````````````````````````````````````````````
+
+Do not accept mixed string / float / int input, only
+strings are valid categoricals.
diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py
@@ -719,7 +719,7 @@ def __init__(self, axes, pickradius=15):
         self.label = self._get_label()
         self.labelpad = rcParams['axes.labelpad']
         self.offsetText = self._get_offset_text()
-        self.unit_data = None
+
         self.pickradius = pickradius
 
         # Initialize here for testing; later add API
@@ -777,15 +777,14 @@ def limit_range_for_scale(self, vmin, vmax):
         return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos())
 
     @property
+    @cbook.deprecated("2.2.0")
     def unit_data(self):
-        """Holds data that a ConversionInterface subclass uses
-        to convert between labels and indexes
-        """
-        return self._unit_data
+        return self.units
 
     @unit_data.setter
+    @cbook.deprecated("2.2.0")
     def unit_data(self, unit_data):
-        self._unit_data = unit_data
+        self.set_units(unit_data)
 
     def get_children(self):
         children = [self.label, self.offsetText]

diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py
@@ -1,123 +1,206 @@
-# -*- coding: utf-8 OA-*-za
+# -*- coding: utf-8 -*-
 """
-catch all for categorical functions
+Module that allows plotting of string "category" data.  i.e.
+``plot(['d', 'f', 'a'],[1, 2, 3])`` will plot three points with x-axis
+values of 'd', 'f', 'a'.
+
+See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an
+example.
+
+The module uses Matplotlib's `matplotlib.units` mechanism to convert from
+strings to integers, provides a tick locator and formatter, and the
+class:`.UnitData` that creates and stores the string-to-integer mapping.   
 """
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
+
+from collections import OrderedDict
+import itertools
+
 import six
 
+
 import numpy as np
 
 import matplotlib.units as units
 import matplotlib.ticker as ticker
 
 # np 1.6/1.7 support
 from distutils.version import LooseVersion
-import collections
-
-
-if LooseVersion(np.__version__) >= LooseVersion('1.8.0'):
-    def shim_array(data):
-        return np.array(data, dtype=np.unicode)
-else:
-    def shim_array(data):
-        if (isinstance(data, six.string_types) or
-                not isinstance(data, collections.Iterable)):
-            data = [data]
-        try:
-            data = [str(d) for d in data]
-        except UnicodeEncodeError:
-            # this yields gibberish but unicode text doesn't
-            # render under numpy1.6 anyway
-            data = [d.encode('utf-8', 'ignore').decode('utf-8')
-                    for d in data]
-        return np.array(data, dtype=np.unicode)
+
+VALID_TYPES = tuple(set(six.string_types +
+                        (bytes, six.text_type, np.str_, np.bytes_)))
 
 
 class StrCategoryConverter(units.ConversionInterface):
     @staticmethod
     def convert(value, unit, axis):
-        """Uses axis.unit_data map to encode
-        data as floats
+        """Converts strings in value to floats using
+        mapping information store in the  unit object
+
+        Parameters
+        ----------
+        value : string or iterable
+            value or list of values to be converted
+        unit : :class:`.UnitData`
+           object string unit information for value
+        axis : :class:`~matplotlib.Axis.axis`
+            axis on which the converted value is plotted
+
+        Returns
+        -------
+        mapped_ value : float or ndarray[float]
+
+        .. note:: axis is not used in this function
         """
-        value = np.atleast_1d(value)
-        # try and update from here....
-        if hasattr(axis.unit_data, 'update'):
-            for val in value:
-                if isinstance(val, six.string_types):
-                    axis.unit_data.update(val)
-        vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs))
+        # dtype = object preserves numerical pass throughs
+        values = np.atleast_1d(np.array(value, dtype=object))
 
-        if isinstance(value, six.string_types):
-            return vmap[value]
+        # pass through sequence of non binary numbers
+        if all((units.ConversionInterface.is_numlike(v) and
+                not isinstance(v, VALID_TYPES)) for v in values):
+            return np.asarray(values, dtype=float)
 
-        vals = shim_array(value)
+        # force an update so it also does type checking
+        unit.update(values)
 
-        for lab, loc in vmap.items():
-            vals[vals == lab] = loc
+        str2idx = np.vectorize(unit._mapping.__getitem__,
+                               otypes=[float])
 
-        return vals.astype('float')
+        mapped_value = str2idx(values)
+        return mapped_value
 
     @staticmethod
     def axisinfo(unit, axis):
-        majloc = StrCategoryLocator(axis.unit_data.locs)
-        majfmt = StrCategoryFormatter(axis.unit_data.seq)
+        """Sets the default axis ticks and labels
+
+        Parameters
+        ---------
+        unit : :class:`.UnitData`
+            object string unit information for value
+        axis : :class:`~matplotlib.Axis.axis`
+            axis for which information is being set
+
+        Returns
+        -------
+        :class:~matplotlib.units.AxisInfo~
+            Information to support default tick labeling
+
+        .. note: axis is not used
+        """
+        # locator and formatter take mapping dict because
+        # args need to be pass by reference for updates
+        majloc = StrCategoryLocator(unit._mapping)
+        majfmt = StrCategoryFormatter(unit._mapping)
         return units.AxisInfo(majloc=majloc, majfmt=majfmt)
 
     @staticmethod
     def default_units(data, axis):
-        # the conversion call stack is:
+        """ Sets and updates the :class:`~matplotlib.Axis.axis~ units
+
+        Parameters
+        ----------
+        data : string or iterable of strings
+        axis : :class:`~matplotlib.Axis.axis`
+            axis on which the data is plotted
+
+        Returns
+        -------
+        class:~.UnitData~
+            object storing string to integer mapping
+        """
+        # the conversion call stack is supposed to be
         # default_units->axis_info->convert
-        if axis.unit_data is None:
-            axis.unit_data = UnitData(data)
+        if axis.units is None:
+            axis.set_units(UnitData(data))
         else:
-            axis.unit_data.update(data)
-        return None
+            axis.units.update(data)
+        return axis.units
+
+
+class StrCategoryLocator(ticker.Locator):
+    """tick at every integer mapping of the string data"""
+    def __init__(self, units_mapping):
+        """
+        Parameters
+        -----------
+        units: dict
+             string:integer mapping
+        """
+        self._units = units_mapping
 
+    def __call__(self):
+        return list(self._units.values())
 
-class StrCategoryLocator(ticker.FixedLocator):
-    def __init__(self, locs):
-        self.locs = locs
-        self.nbins = None
+    def tick_values(self, vmin, vmax):
+        return self()
 
 
-class StrCategoryFormatter(ticker.FixedFormatter):
-    def __init__(self, seq):
-        self.seq = seq
-        self.offset_string = ''
+class StrCategoryFormatter(ticker.Formatter):
+    """String representation of the data at every tick"""
+    def __init__(self, units_mapping):
+        """
+        Parameters
+        ----------
+        units: dict
+            string:integer mapping
+        """
+        self._units = units_mapping
 
+    def __call__(self, x, pos=None):
+        if pos is None:
+            return ""
+        r_mapping = {v: StrCategoryFormatter._text(k)
+                     for k, v in self._units.items()}
+        return r_mapping.get(int(np.round(x)), '')
 
-class UnitData(object):
-    # debatable makes sense to special code missing values
-    spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0}
+    @staticmethod
+    def _text(value):
+        """Converts text values into `utf-8` or `ascii` strings
+        """
+        if LooseVersion(np.__version__) < LooseVersion('1.7.0'):
+            if (isinstance(value, (six.text_type, np.unicode))):
+                value = value.encode('utf-8', 'ignore').decode('utf-8')
+        if isinstance(value, (np.bytes_, six.binary_type)):
+            value = value.decode(encoding='utf-8')
+        elif not isinstance(value, (np.str_, six.string_types)):
+            value = str(value)
+        return value
 
-    def __init__(self, data):
-        """Create mapping between unique categorical values
-        and numerical identifier
 
-        Parameters
+class UnitData(object):
+    def __init__(self, data=None):
+        """Create mapping between unique categorical values
+        and integer identifiers
         ----------
         data: iterable
-            sequence of values
+              sequence of string values
         """
-        self.seq, self.locs = [], []
-        self._set_seq_locs(data, 0)
-
-    def update(self, new_data):
-        # so as not to conflict with spdict
-        value = max(max(self.locs) + 1, 0)
-        self._set_seq_locs(new_data, value)
-
-    def _set_seq_locs(self, data, value):
-        strdata = shim_array(data)
-        new_s = [d for d in np.unique(strdata) if d not in self.seq]
-        for ns in new_s:
-            self.seq.append(ns)
-            if ns in UnitData.spdict:
-                self.locs.append(UnitData.spdict[ns])
-            else:
-                self.locs.append(value)
-                value += 1
+        self._mapping = OrderedDict()
+        self._counter = itertools.count(start=0)
+        if data is not None:
+            self.update(data)
+
+    def update(self, data):
+        """Maps new values to integer identifiers.
+
+        Paramters
+        ---------
+        data: iterable
+              sequence of string values
+
+        Raises
+        ------
+        TypeError
+              If the value in data is not a string, unicode, bytes type
+        """
+        data = np.atleast_1d(np.array(data, dtype=object))
+
+        for val in OrderedDict.fromkeys(data):
+            if not isinstance(val, VALID_TYPES):
+                raise TypeError("{val!r} is not a string".format(val=val))
+            if val not in self._mapping:
+                self._mapping[val] = next(self._counter)
 
 
 # Connects the convertor to matplotlib