From 4c9353f80faec550401e126103b0917b6e49168f Mon Sep 17 00:00:00 2001
From: hannah <story645@gmail.com>
Date: Mon, 13 Nov 2017 17:36:17 -0500
Subject: [PATCH 1/5] updated tests to dicts

---
 lib/matplotlib/tests/test_category.py | 79 +++++++++++++--------------
 1 file changed, 39 insertions(+), 40 deletions(-)

diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py
index 7156dc59933c..f0d4eeddf138 100644
--- a/lib/matplotlib/tests/test_category.py
+++ b/lib/matplotlib/tests/test_category.py
@@ -3,25 +3,26 @@
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
 
+import six
 import pytest
 import numpy as np
 
+
 import matplotlib.pyplot as plt
 import matplotlib.category as cat
 
-import unittest
 
 
 class TestUnitData(object):
-    testdata = [("hello world", ["hello world"], [0]),
-                ("Здравствуйте мир", ["Здравствуйте мир"], [0]),
-                (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf],
-                 ['-inf', '3.14', 'A', 'B', 'inf', 'nan'],
-                 [-3.0, 0, 1, 2, -2.0, -1.0])]
-
-    ids = ["single", "unicode", "mixed"]
-
-    @pytest.mark.parametrize("data, seq, locs", testdata, ids=ids)
+    test_cases = {'single': ("hello world", ["hello world"], [0]),
+                  'unicode': ("Здравствуйте мир", ["Здравствуйте мир"], [0]),
+                  'mixed': (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf],
+                       ['-inf', '3.14', 'A', 'B', 'inf', 'nan'],
+                       [-3.0, 0, 1, 2, -2.0, -1.0])}
+ 
+    ids, data = zip(*six.iteritems(test_cases))
+
+    @pytest.mark.parametrize("data, seq, locs", data, ids=ids)
     def test_unit(self, data, seq, locs):
         act = cat.UnitData(data)
         assert act.seq == seq
@@ -63,22 +64,26 @@ class TestStrCategoryConverter(object):
     ref: /pandas/tseries/tests/test_converter.py
          /pandas/tests/test_algos.py:TestFactorize
     """
-    testdata = [("Здравствуйте мир", [("Здравствуйте мир", 42)], 42),
-                ("hello world", [("hello world", 42)], 42),
-                (['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
-                 [('a', 0), ('b', 1), ('c', 2)],
-                 [0, 1, 1, 0, 0, 2, 2, 2]),
-                (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf],
-                 [('nan', -1), ('3.14', 0), ('A', 1), ('B', 2),
-                  ('-inf', 100), ('inf', 200)],
-                 [1, 1, -1, 2, 100, 0, 200])]
-    ids = ["unicode", "single", "basic", "mixed"]
+
+    test_cases = {"unicode": ("Здравствуйте мир", [("Здравствуйте мир", 42)], 42),
+                  "ascii" : ("hello world", [("hello world", 42)], 42),
+                  "single" : (['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
+                          [('a', 0), ('b', 1), ('c', 2)],
+                            [0, 1, 1, 0, 0, 2, 2, 2]),
+                  "mixed": (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf],
+                      [('nan', -1), ('3.14', 0), ('A', 1), ('B', 2),
+                        ('-inf', 100), ('inf', 200)], 
+                        [1, 1, -1, 2, 100, 0, 200]),
+                 "integer string": (["!", "0"], [("!", 0), ("0", 1)], [0, 1]),
+                 "number": (0.0, [(0.0, 0.0)], 0.0)}
+
+    ids, data = zip(*six.iteritems(test_cases))
 
     @pytest.fixture(autouse=True)
     def mock_axis(self, request):
         self.cc = cat.StrCategoryConverter()
 
-    @pytest.mark.parametrize("data, unitmap, exp", testdata, ids=ids)
+    @pytest.mark.parametrize("data, unitmap, exp", data, ids=ids)
     def test_convert(self, data, unitmap, exp):
         MUD = MockUnitData(unitmap)
         axis = FakeAxis(MUD)
@@ -104,7 +109,7 @@ def test_StrCategoryLocator(self):
         np.testing.assert_array_equal(ticks.tick_values(None, None), locs)
 
 
-class TestStrCategoryFormatter(unittest.TestCase):
+class TestStrCategoryFormatter(object):
     def test_StrCategoryFormatter(self):
         seq = ["hello", "world", "hi"]
         labels = cat.StrCategoryFormatter(seq)
@@ -121,24 +126,18 @@ def lt(tl):
 
 
 class TestPlot(object):
-    bytes_data = [
-        ['a', 'b', 'c'],
-        [b'a', b'b', b'c'],
-        np.array([b'a', b'b', b'c'])
-    ]
-
-    bytes_ids = ['string list', 'bytes list', 'bytes ndarray']
-
-    numlike_data = [
-        ['1', '11', '3'],
-        np.array(['1', '11', '3']),
-        [b'1', b'11', b'3'],
-        np.array([b'1', b'11', b'3']),
-    ]
-
-    numlike_ids = [
-        'string list', 'string ndarray', 'bytes list', 'bytes ndarray'
-    ]
+    bytes_cases = {'string list': ['a', 'b', 'c'],
+                    'bytes list': [b'a', b'b', b'c'],
+                    'bytes ndarray' : np.array([b'a', b'b', b'c'])}
+
+    bytes_ids, bytes_data = zip(*six.iteritems(bytes_cases))
+
+    numlike_cases = {'string list': ['1', '11', '3'],
+                     'string ndarray' : np.array(['1', '11', '3']),
+                     'bytes list' : [b'1', b'11', b'3'],
+                     'bytes ndarray' : np.array([b'1', b'11', b'3'])}
+
+    numlike_ids, numlike_data = zip(*six.iteritems(numlike_cases))
 
     @pytest.fixture
     def data(self):

From 34b8eb46e5de6b760bc131e461755042716e259d Mon Sep 17 00:00:00 2001
From: hannah <story645@gmail.com>
Date: Mon, 13 Nov 2017 21:01:13 -0500
Subject: [PATCH 2/5] category bug fix + new tests + refactor

---
 lib/matplotlib/axes/_axes.py          |  27 +-
 lib/matplotlib/axis.py                |  10 +-
 lib/matplotlib/category.py            | 192 +++++++-----
 lib/matplotlib/tests/test_axes.py     |   8 +
 lib/matplotlib/tests/test_category.py | 416 ++++++++++++++------------
 5 files changed, 361 insertions(+), 292 deletions(-)

diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py
index 5622082009bb..e4ce15ec4dfc 100644
--- a/lib/matplotlib/axes/_axes.py
+++ b/lib/matplotlib/axes/_axes.py
@@ -5892,7 +5892,7 @@ def hist(self, x, bins=None, range=None, density=None, weights=None,
         Parameters
         ----------
         x : (n,) array or sequence of (n,) arrays
-            Input values, this takes either a single array or a sequency of
+            Input values, this takes either a single array or a sequence of
             arrays which are not required to be of the same length
 
         bins : integer or sequence or 'auto', optional
@@ -6104,30 +6104,31 @@ def hist(self, x, bins=None, range=None, density=None, weights=None,
                              "Please only use 'density', since 'normed'"
                              "will be deprecated.")
 
-        # process the unit information
-        self._process_unit_info(xdata=x, kwargs=kwargs)
-        x = self.convert_xunits(x)
-        if bin_range is not None:
-            bin_range = self.convert_xunits(bin_range)
-
-        # Check whether bins or range are given explicitly.
-        binsgiven = (cbook.iterable(bins) or bin_range is not None)
-
         # basic input validation
         input_empty = np.size(x) == 0
-
         # Massage 'x' for processing.
         if input_empty:
-            x = np.array([[]])
+            x = [np.array([])]
         else:
             x = cbook._reshape_2D(x, 'x')
         nx = len(x)  # number of datasets
 
+        # Process unit information
+        # Unit conversion is done individually on each dataset
+        self._process_unit_info(xdata=x[0], kwargs=kwargs)
+        x = [self.convert_xunits(xi) for xi in x]
+
+        if bin_range is not None:
+            bin_range = self.convert_xunits(bin_range)
+
+        # Check whether bins or range are given explicitly.
+        binsgiven = (cbook.iterable(bins) or bin_range is not None)
+
         # We need to do to 'weights' what was done to 'x'
         if weights is not None:
             w = cbook._reshape_2D(weights, 'weights')
         else:
-            w = [None]*nx
+            w = [None] * nx
 
         if len(w) != nx:
             raise ValueError('weights should have the same shape as x')
diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py
index 948e3ae1386f..f9717fa73f65 100644
--- a/lib/matplotlib/axis.py
+++ b/lib/matplotlib/axis.py
@@ -668,7 +668,6 @@ def __init__(self, axes, pickradius=15):
         self.offsetText = self._get_offset_text()
         self.majorTicks = []
         self.minorTicks = []
-        self.unit_data = None
         self.pickradius = pickradius
 
         # Initialize here for testing; later add API
@@ -720,15 +719,14 @@ def limit_range_for_scale(self, vmin, vmax):
         return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos())
 
     @property
+    @cbook.deprecated("2.1.1")
     def unit_data(self):
-        """Holds data that a ConversionInterface subclass uses
-        to convert between labels and indexes
-        """
-        return self._unit_data
+        return self._units
 
     @unit_data.setter
+    @cbook.deprecated("2.1.1")
     def unit_data(self, unit_data):
-        self._unit_data = unit_data
+        self.set_units = unit_data
 
     def get_children(self):
         children = [self.label, self.offsetText]
diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py
index d2754d32fd3a..1dd7c8aa469d 100644
--- a/lib/matplotlib/category.py
+++ b/lib/matplotlib/category.py
@@ -1,9 +1,13 @@
-# -*- coding: utf-8 OA-*-za
+# -*- coding: utf-8 -*-
 """
 catch all for categorical functions
 """
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
+
+from collections import Iterable, OrderedDict
+import itertools
+
 import six
 
 import numpy as np
@@ -13,111 +17,149 @@
 
 # np 1.6/1.7 support
 from distutils.version import LooseVersion
-import collections
 
+VALID_TYPES = tuple(set(six.string_types +
+                        (bytes, six.text_type, np.str_, np.bytes_)))
 
-if LooseVersion(np.__version__) >= LooseVersion('1.8.0'):
-    def shim_array(data):
-        return np.array(data, dtype=np.unicode)
-else:
-    def shim_array(data):
-        if (isinstance(data, six.string_types) or
-                not isinstance(data, collections.Iterable)):
-            data = [data]
-        try:
-            data = [str(d) for d in data]
-        except UnicodeEncodeError:
-            # this yields gibberish but unicode text doesn't
-            # render under numpy1.6 anyway
-            data = [d.encode('utf-8', 'ignore').decode('utf-8')
-                    for d in data]
-        return np.array(data, dtype=np.unicode)
+
+def to_str(value):
+    """Helper function to turn values to strings.
+    """
+    # Note: This function is only used by StrCategoryFormatter
+    if LooseVersion(np.__version__) < LooseVersion('1.7.0'):
+        if (isinstance(value, (six.text_type, np.unicode))):
+            value = value.encode('utf-8', 'ignore').decode('utf-8')
+    if isinstance(value, (np.bytes_, six.binary_type)):
+        value = value.decode(encoding='utf-8')
+    elif not isinstance(value, (np.str_, six.string_types)):
+        value = str(value)
+    return value
 
 
 class StrCategoryConverter(units.ConversionInterface):
     @staticmethod
     def convert(value, unit, axis):
-        """Uses axis.unit_data map to encode
-        data as floats
+        """Uses axis.units to encode string data as floats
+
+        Parameters
+        ----------
+        value: string, iterable
+            value or list of values to plot
+        unit:
+        axis:
         """
-        value = np.atleast_1d(value)
-        # try and update from here....
-        if hasattr(axis.unit_data, 'update'):
-            for val in value:
-                if isinstance(val, six.string_types):
-                    axis.unit_data.update(val)
-        vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs))
+        # dtype = object preserves numerical pass throughs
+        values = np.atleast_1d(np.array(value, dtype=object))
 
-        if isinstance(value, six.string_types):
-            return vmap[value]
+        # pass through sequence of non binary numbers
+        if all((units.ConversionInterface.is_numlike(v) and
+                not isinstance(v, VALID_TYPES)) for v in values):
+            return np.asarray(values, dtype=float)
 
-        vals = shim_array(value)
+        # force an update so it also does type checking
+        axis.units.update(values)
 
-        for lab, loc in vmap.items():
-            vals[vals == lab] = loc
+        str2idx = np.vectorize(axis.units._mapping.__getitem__,
+                               otypes=[float])
 
-        return vals.astype('float')
+        mapped_value = str2idx(values)
+        return mapped_value
 
     @staticmethod
     def axisinfo(unit, axis):
-        majloc = StrCategoryLocator(axis.unit_data.locs)
-        majfmt = StrCategoryFormatter(axis.unit_data.seq)
+        """Sets the axis ticks and labels
+        """
+        # locator and formatter take mapping dict because
+        # args need to be pass by reference for updates
+        majloc = StrCategoryLocator(axis.units)
+        majfmt = StrCategoryFormatter(axis.units)
         return units.AxisInfo(majloc=majloc, majfmt=majfmt)
 
     @staticmethod
-    def default_units(data, axis):
-        # the conversion call stack is:
+    def default_units(data=None, axis=None):
+        # the conversion call stack is supposed to be
         # default_units->axis_info->convert
-        if axis.unit_data is None:
-            axis.unit_data = UnitData(data)
+        if axis.units is None:
+            axis.set_units(UnitData(data))
         else:
-            axis.unit_data.update(data)
-        return None
+            axis.units.update(data)
+        return axis.units
 
 
-class StrCategoryLocator(ticker.FixedLocator):
-    def __init__(self, locs):
-        self.locs = locs
-        self.nbins = None
+class StrCategoryLocator(ticker.Locator):
+    """tick at every integer mapping of the string data"""
+    def __init__(self, units):
+        """
+        Parameters
+        -----------
+        units: dict
+              (string, integer) mapping
+        """
+        self._units = units
 
+    def __call__(self):
+        return list(self._units._mapping.values())
 
-class StrCategoryFormatter(ticker.FixedFormatter):
-    def __init__(self, seq):
-        self.seq = seq
-        self.offset_string = ''
+    def tick_values(self, vmin, vmax):
+        return self()
 
 
-class UnitData(object):
-    # debatable makes sense to special code missing values
-    spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0}
+class StrCategoryFormatter(ticker.Formatter):
+    """String representation of the data at every tick"""
+    def __init__(self, units):
+        """
+        Parameters
+        ----------
+        units: dict
+              (string, integer) mapping
+        """
+        self._units = units
 
-    def __init__(self, data):
-        """Create mapping between unique categorical values
-        and numerical identifier
+    def __call__(self, x, pos=None):
+        if pos is None:
+            return ""
+        r_mapping = {v: to_str(k) for k, v in self._units._mapping.items()}
+        return r_mapping.get(int(np.round(x)), '')
 
-        Parameters
+
+class UnitData(object):
+    def __init__(self, data=None):
+        """Create mapping between unique categorical values
+        and integer identifiers
         ----------
         data: iterable
-            sequence of values
+              sequence of string values
+        """
+        if data is None:
+            data = ()
+        self._mapping = OrderedDict()
+        self._counter = itertools.count(start=0)
+        self.update(data)
+
+    def update(self, data):
+        """Maps new values to integer identifiers.
+
+        Paramters
+        ---------
+        data: iterable
+              sequence of string values
+
+        Raises
+        ------
+        TypeError
+              If the value in data is not a string, unicode, bytes type
         """
-        self.seq, self.locs = [], []
-        self._set_seq_locs(data, 0)
-
-    def update(self, new_data):
-        # so as not to conflict with spdict
-        value = max(max(self.locs) + 1, 0)
-        self._set_seq_locs(new_data, value)
-
-    def _set_seq_locs(self, data, value):
-        strdata = shim_array(data)
-        new_s = [d for d in np.unique(strdata) if d not in self.seq]
-        for ns in new_s:
-            self.seq.append(ns)
-            if ns in UnitData.spdict:
-                self.locs.append(UnitData.spdict[ns])
-            else:
-                self.locs.append(value)
-                value += 1
+
+        if (isinstance(data, VALID_TYPES) or
+                not isinstance(data, Iterable)):
+            data = [data]
+
+        unsorted_unique = OrderedDict.fromkeys(data)
+        for val in unsorted_unique:
+            if not isinstance(val, VALID_TYPES):
+                raise TypeError("{val!r} is not a string".format(val=val))
+            if val not in self._mapping:
+                self._mapping[val] = next(self._counter)
 
 
 # Connects the convertor to matplotlib
diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py
index c82cd5223dca..7e26c0f08e4b 100644
--- a/lib/matplotlib/tests/test_axes.py
+++ b/lib/matplotlib/tests/test_axes.py
@@ -1592,6 +1592,14 @@ def test_hist_unequal_bins_density():
     assert_allclose(mpl_heights, np_heights)
 
 
+def test_hist_datetime_datasets():
+    data = [[datetime.datetime(2017, 1, 1), datetime.datetime(2017, 1, 1)],
+            [datetime.datetime(2017, 1, 1), datetime.datetime(2017, 1, 2)]]
+    fig, ax = plt.subplots()
+    ax.hist(data, stacked=True)
+    ax.hist(data, stacked=False)
+
+
 def contour_dat():
     x = np.linspace(-3, 5, 150)
     y = np.linspace(-3, 5, 120)
diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py
index f0d4eeddf138..80465109bc10 100644
--- a/lib/matplotlib/tests/test_category.py
+++ b/lib/matplotlib/tests/test_category.py
@@ -3,59 +3,64 @@
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
 
-import six
 import pytest
 import numpy as np
 
-
+from matplotlib.axes import Axes
 import matplotlib.pyplot as plt
 import matplotlib.category as cat
 
 
-
 class TestUnitData(object):
-    test_cases = {'single': ("hello world", ["hello world"], [0]),
-                  'unicode': ("Здравствуйте мир", ["Здравствуйте мир"], [0]),
-                  'mixed': (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf],
-                       ['-inf', '3.14', 'A', 'B', 'inf', 'nan'],
-                       [-3.0, 0, 1, 2, -2.0, -1.0])}
- 
-    ids, data = zip(*six.iteritems(test_cases))
-
-    @pytest.mark.parametrize("data, seq, locs", data, ids=ids)
-    def test_unit(self, data, seq, locs):
-        act = cat.UnitData(data)
-        assert act.seq == seq
-        assert act.locs == locs
-
-    def test_update_map(self):
+    test_cases = [('single', (["hello world"], [0])),
+                  ('unicode', (["Здравствуйте мир"], [0])),
+                  ('mixed', (['A', "np.nan", 'B', "3.14", "мир"],
+                             [0, 1, 2, 3, 4]))]
+
+    ids, data = zip(*test_cases)
+
+    @pytest.mark.parametrize("data, locs", data, ids=ids)
+    def test_unit(self, data, locs):
+        unit = cat.UnitData(data)
+        assert list(unit._mapping.keys()) == data
+        assert list(unit._mapping.values()) == locs
+
+    def test_update(self):
         data = ['a', 'd']
-        oseq = ['a', 'd']
-        olocs = [0, 1]
+        locs = [0, 1]
 
-        data_update = ['b', 'd', 'e', np.inf]
-        useq = ['a', 'd', 'b', 'e', 'inf']
-        ulocs = [0, 1, 2, 3, -2]
+        data_update = ['b', 'd', 'e']
+        unique_data = ['a', 'd', 'b', 'e']
+        updated_locs = [0, 1, 2, 3]
 
-        unitdata = cat.UnitData(data)
-        assert unitdata.seq == oseq
-        assert unitdata.locs == olocs
+        unit = cat.UnitData(data)
+        assert list(unit._mapping.keys()) == data
+        assert list(unit._mapping.values()) == locs
 
-        unitdata.update(data_update)
-        assert unitdata.seq == useq
-        assert unitdata.locs == ulocs
+        unit.update(data_update)
+        assert list(unit._mapping.keys()) == unique_data
+        assert list(unit._mapping.values()) == updated_locs
 
+    failing_test_cases = [("number", 3.14), ("nan", np.nan),
+                          ("list", [3.14, 12]), ("mixed type", ["A", 2])]
 
-class FakeAxis(object):
-    def __init__(self, unit_data):
-        self.unit_data = unit_data
+    fids, fdata = zip(*test_cases)
 
+    @pytest.mark.parametrize("fdata", fdata, ids=fids)
+    def test_non_string_fails(self, fdata):
+        with pytest.raises(TypeError):
+            cat.UnitData(fdata)
 
-class MockUnitData(object):
-    def __init__(self, data):
-        seq, locs = zip(*data)
-        self.seq = list(seq)
-        self.locs = list(locs)
+    @pytest.mark.parametrize("fdata", fdata, ids=fids)
+    def test_non_string_update_fails(self, fdata):
+        unitdata = cat.UnitData()
+        with pytest.raises(TypeError):
+            unitdata.update(fdata)
+
+
+class FakeAxis(object):
+    def __init__(self, units):
+        self.units = units
 
 
 class TestStrCategoryConverter(object):
@@ -65,193 +70,208 @@ class TestStrCategoryConverter(object):
          /pandas/tests/test_algos.py:TestFactorize
     """
 
-    test_cases = {"unicode": ("Здравствуйте мир", [("Здравствуйте мир", 42)], 42),
-                  "ascii" : ("hello world", [("hello world", 42)], 42),
-                  "single" : (['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
-                          [('a', 0), ('b', 1), ('c', 2)],
-                            [0, 1, 1, 0, 0, 2, 2, 2]),
-                  "mixed": (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf],
-                      [('nan', -1), ('3.14', 0), ('A', 1), ('B', 2),
-                        ('-inf', 100), ('inf', 200)], 
-                        [1, 1, -1, 2, 100, 0, 200]),
-                 "integer string": (["!", "0"], [("!", 0), ("0", 1)], [0, 1]),
-                 "number": (0.0, [(0.0, 0.0)], 0.0)}
+    test_cases = [("unicode", ["Здравствуйте мир"]),
+                  ("ascii", ["hello world"]),
+                  ("single", ['a', 'b', 'c']),
+                  ("integer string", ["1", "2"]),
+                  ("single + values>10", ["A", "B", "C", "D", "E", "F", "G",
+                                          "H", "I", "J", "K", "L", "M", "N",
+                                          "O", "P", "Q", "R", "S", "T", "U",
+                                          "V", "W", "X", "Y", "Z"])]
+
+    ids, values = zip(*test_cases)
 
-    ids, data = zip(*six.iteritems(test_cases))
+    failing_test_cases = [("mixed", [3.14, 'A', np.inf]),
+                          ("string integer", ['42', 42])]
+
+    fids, fvalues = zip(*failing_test_cases)
 
     @pytest.fixture(autouse=True)
     def mock_axis(self, request):
         self.cc = cat.StrCategoryConverter()
+        # self.unit should be probably be  replaced with real mock unit
+        self.unit = cat.UnitData()
+        self.ax = FakeAxis(self.unit)
 
-    @pytest.mark.parametrize("data, unitmap, exp", data, ids=ids)
-    def test_convert(self, data, unitmap, exp):
-        MUD = MockUnitData(unitmap)
-        axis = FakeAxis(MUD)
-        act = self.cc.convert(data, None, axis)
-        np.testing.assert_array_equal(act, exp)
-
-    def test_axisinfo(self):
-        MUD = MockUnitData([(None, None)])
-        axis = FakeAxis(MUD)
-        ax = self.cc.axisinfo(None, axis)
-        assert isinstance(ax.majloc, cat.StrCategoryLocator)
-        assert isinstance(ax.majfmt, cat.StrCategoryFormatter)
-
-    def test_default_units(self):
-        axis = FakeAxis(None)
-        assert self.cc.default_units(["a"], axis) is None
-
-
-class TestStrCategoryLocator(object):
-    def test_StrCategoryLocator(self):
-        locs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-        ticks = cat.StrCategoryLocator(locs)
-        np.testing.assert_array_equal(ticks.tick_values(None, None), locs)
-
-
-class TestStrCategoryFormatter(object):
-    def test_StrCategoryFormatter(self):
-        seq = ["hello", "world", "hi"]
-        labels = cat.StrCategoryFormatter(seq)
-        assert labels('a', 1) == "world"
-
-    def test_StrCategoryFormatterUnicode(self):
-        seq = ["Здравствуйте", "привет"]
-        labels = cat.StrCategoryFormatter(seq)
-        assert labels('a', 1) == "привет"
+    @pytest.mark.parametrize("vals", values, ids=ids)
+    def test_convert(self, vals):
+        np.testing.assert_allclose(self.cc.convert(vals, None, self.ax),
+                                   range(len(vals)))
 
+    @pytest.mark.parametrize("value", ["hi", "мир"], ids=["ascii", "unicode"])
+    def test_convert_one_string(self, value):
+        assert self.cc.convert(value, None, self.ax) == 0
 
-def lt(tl):
-    return [l.get_text() for l in tl]
+    def test_convert_one_number(self):
+        actual = self.cc.convert(0.0, None, self.ax)
+        np.testing.assert_allclose(actual, np.array([0.]))
 
+    def test_convert_float_array(self):
+        data = np.array([1, 2, 3], dtype=float)
+        actual = self.cc.convert(data, None, self.ax)
+        np.testing.assert_allclose(actual, np.array([1., 2., 3.]))
 
-class TestPlot(object):
-    bytes_cases = {'string list': ['a', 'b', 'c'],
-                    'bytes list': [b'a', b'b', b'c'],
-                    'bytes ndarray' : np.array([b'a', b'b', b'c'])}
+    @pytest.mark.parametrize("fvals", fvalues, ids=fids)
+    def test_convert_fail(self, fvals):
+        with pytest.raises(TypeError):
+            self.cc.convert(fvals, None, self.ax)
 
-    bytes_ids, bytes_data = zip(*six.iteritems(bytes_cases))
-
-    numlike_cases = {'string list': ['1', '11', '3'],
-                     'string ndarray' : np.array(['1', '11', '3']),
-                     'bytes list' : [b'1', b'11', b'3'],
-                     'bytes ndarray' : np.array([b'1', b'11', b'3'])}
-
-    numlike_ids, numlike_data = zip(*six.iteritems(numlike_cases))
-
-    @pytest.fixture
-    def data(self):
-        self.d = ['a', 'b', 'c', 'a']
-        self.dticks = [0, 1, 2]
-        self.dlabels = ['a', 'b', 'c']
-        unitmap = [('a', 0), ('b', 1), ('c', 2)]
-        self.dunit_data = MockUnitData(unitmap)
-
-    @pytest.fixture
-    def missing_data(self):
-        self.dm = ['here', np.nan, 'here', 'there']
-        self.dmticks = [0, -1, 1]
-        self.dmlabels = ['here', 'nan', 'there']
-        unitmap = [('here', 0), ('nan', -1), ('there', 1)]
-        self.dmunit_data = MockUnitData(unitmap)
-
-    def axis_test(self, axis, ticks, labels, unit_data):
-        np.testing.assert_array_equal(axis.get_majorticklocs(), ticks)
-        assert lt(axis.get_majorticklabels()) == labels
-        np.testing.assert_array_equal(axis.unit_data.locs, unit_data.locs)
-        assert axis.unit_data.seq == unit_data.seq
-
-    def test_plot_unicode(self):
-        words = ['Здравствуйте', 'привет']
-        locs = [0.0, 1.0]
-        unit_data = MockUnitData(zip(words, locs))
+    def test_axisinfo(self):
+        axis = self.cc.axisinfo(None, self.ax)
+        assert isinstance(axis.majloc, cat.StrCategoryLocator)
+        assert isinstance(axis.majfmt, cat.StrCategoryFormatter)
 
-        fig, ax = plt.subplots()
-        ax.plot(words)
-        fig.canvas.draw()
+    def test_default_units(self):
+        assert isinstance(self.cc.default_units(["a"], self.ax), cat.UnitData)
 
-        self.axis_test(ax.yaxis, locs, words, unit_data)
 
-    @pytest.mark.usefixtures("data")
-    def test_plot_1d(self):
-        fig, ax = plt.subplots()
-        ax.plot(self.d)
-        fig.canvas.draw()
+@pytest.fixture
+def ax():
+    return plt.figure().subplots()
 
-        self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data)
 
-    @pytest.mark.usefixtures("missing_data")
-    def test_plot_1d_missing(self):
-        fig, ax = plt.subplots()
-        ax.plot(self.dm)
-        fig.canvas.draw()
+PLOT_LIST = [Axes.scatter, Axes.plot, Axes.bar]
+PLOT_IDS = ["scatter", "plot", "bar"]
 
-        self.axis_test(ax.yaxis, self.dmticks, self.dmlabels, self.dmunit_data)
 
-    @pytest.mark.usefixtures("data")
-    @pytest.mark.parametrize("bars", bytes_data, ids=bytes_ids)
-    def test_plot_bytes(self, bars):
-        counts = np.array([4, 6, 5])
+class TestStrCategoryLocator(object):
+    def test_StrCategoryLocator(self):
+        locs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        unit = cat.UnitData([str(j) for j in locs])
+        ticks = cat.StrCategoryLocator(unit)
+        np.testing.assert_array_equal(ticks.tick_values(None, None), locs)
 
-        fig, ax = plt.subplots()
-        ax.bar(bars, counts)
-        fig.canvas.draw()
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    def test_StrCategoryLocatorPlot(self, ax, plotter):
+        ax.plot(["a", "b", "c"])
+        np.testing.assert_array_equal(ax.yaxis.major.locator(), range(3))
 
-        self.axis_test(ax.xaxis, self.dticks, self.dlabels, self.dunit_data)
 
-    @pytest.mark.parametrize("bars", numlike_data, ids=numlike_ids)
-    def test_plot_numlike(self, bars):
+class TestStrCategoryFormatter(object):
+    test_cases = [("ascii", ["hello", "world", "hi"]),
+                  ("unicode", ["Здравствуйте", "привет"])]
+
+    ids, cases = zip(*test_cases)
+
+    @pytest.mark.parametrize("ydata", cases, ids=ids)
+    def test_StrCategoryFormatter(self, ax, ydata):
+        unit = cat.UnitData(ydata)
+        labels = cat.StrCategoryFormatter(unit)
+        for i, d in enumerate(ydata):
+            assert labels(i, i) == d
+
+    @pytest.mark.parametrize("ydata", cases, ids=ids)
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    def test_StrCategoryFormatterPlot(self, ax, ydata, plotter):
+        plotter(ax, range(len(ydata)), ydata)
+        for i, d in enumerate(ydata):
+            assert ax.yaxis.major.formatter(i, i) == d
+        assert ax.yaxis.major.formatter(i+1, i+1) == ""
+        assert ax.yaxis.major.formatter(0, None) == ""
+
+
+def axis_test(axis, labels):
+    ticks = list(range(len(labels)))
+    np.testing.assert_array_equal(axis.get_majorticklocs(), ticks)
+    graph_labels = [axis.major.formatter(i, i) for i in ticks]
+    assert graph_labels == [cat.to_str(l) for l in labels]
+    assert list(axis.units._mapping.keys()) == [l for l in labels]
+    assert list(axis.units._mapping.values()) == ticks
+
+
+class TestPlotBytes(object):
+    bytes_cases = [('string list', ['a', 'b', 'c']),
+                   ('bytes list', [b'a', b'b', b'c']),
+                   ('bytes ndarray', np.array([b'a', b'b', b'c']))]
+
+    bytes_ids, bytes_data = zip(*bytes_cases)
+
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    @pytest.mark.parametrize("bdata", bytes_data, ids=bytes_ids)
+    def test_plot_bytes(self, ax, plotter, bdata):
         counts = np.array([4, 6, 5])
+        plotter(ax, bdata, counts)
+        axis_test(ax.xaxis, bdata)
 
-        fig, ax = plt.subplots()
-        ax.bar(bars, counts)
-        fig.canvas.draw()
-
-        unitmap = MockUnitData([('1', 0), ('11', 1), ('3', 2)])
-        self.axis_test(ax.xaxis, [0, 1, 2], ['1', '11', '3'], unitmap)
-
-    @pytest.mark.usefixtures("data", "missing_data")
-    def test_plot_2d(self):
-        fig, ax = plt.subplots()
-        ax.plot(self.dm, self.d)
-        fig.canvas.draw()
-
-        self.axis_test(ax.xaxis, self.dmticks, self.dmlabels, self.dmunit_data)
-        self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data)
-
-    @pytest.mark.usefixtures("data", "missing_data")
-    def test_scatter_2d(self):
 
-        fig, ax = plt.subplots()
-        ax.scatter(self.dm, self.d)
-        fig.canvas.draw()
+class TestPlotNumlike(object):
+    numlike_cases = [('string list', ['1', '11', '3']),
+                     ('string ndarray', np.array(['1', '11', '3'])),
+                     ('bytes list', [b'1', b'11', b'3']),
+                     ('bytes ndarray', np.array([b'1', b'11', b'3']))]
 
-        self.axis_test(ax.xaxis, self.dmticks, self.dmlabels, self.dmunit_data)
-        self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data)
+    numlike_ids, numlike_data = zip(*numlike_cases)
 
-    def test_plot_update(self):
-        fig, ax = plt.subplots()
-
-        ax.plot(['a', 'b'])
-        ax.plot(['a', 'b', 'd'])
-        ax.plot(['b', 'c', 'd'])
-        fig.canvas.draw()
-
-        labels = ['a', 'b', 'd', 'c']
-        ticks = [0, 1, 2, 3]
-        unit_data = MockUnitData(list(zip(labels, ticks)))
-
-        self.axis_test(ax.yaxis, ticks, labels, unit_data)
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    @pytest.mark.parametrize("ndata", numlike_data, ids=numlike_ids)
+    def test_plot_numlike(self, ax, plotter, ndata):
+        counts = np.array([4, 6, 5])
+        plotter(ax, ndata, counts)
+        axis_test(ax.xaxis, ndata)
 
-    def test_scatter_update(self):
-        fig, ax = plt.subplots()
 
-        ax.scatter(['a', 'b'], [0., 3.])
-        ax.scatter(['a', 'b', 'd'], [1., 2., 3.])
-        ax.scatter(['b', 'c', 'd'], [4., 1., 2.])
-        fig.canvas.draw()
+class TestPlotTypes(object):
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    def test_plot_unicode(self, ax, plotter):
+        words = ['Здравствуйте', 'привет']
+        plotter(ax, words, [0, 1])
+        axis_test(ax.xaxis, words)
 
-        labels = ['a', 'b', 'd', 'c']
-        ticks = [0, 1, 2, 3]
-        unit_data = MockUnitData(list(zip(labels, ticks)))
-        self.axis_test(ax.xaxis, ticks, labels, unit_data)
+    @pytest.fixture
+    def test_data(self):
+        self.x = ["hello", "happy", "world"]
+        self.xy = [2, 6, 3]
+        self.y = ["Python", "is", "fun"]
+        self.yx = [3, 4, 5]
+
+    @pytest.mark.usefixtures("test_data")
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    def test_plot_xaxis(self, ax, test_data, plotter):
+        plotter(ax, self.x, self.xy)
+        axis_test(ax.xaxis, self.x)
+
+    @pytest.mark.usefixtures("test_data")
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    def test_plot_yaxis(self, ax, test_data, plotter):
+        plotter(ax, self.yx, self.y)
+        axis_test(ax.yaxis, self.y)
+
+    @pytest.mark.usefixtures("test_data")
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    def test_plot_xyaxis(self, ax, test_data, plotter):
+        plotter(ax, self.x, self.y)
+        axis_test(ax.xaxis, self.x)
+        axis_test(ax.yaxis, self.y)
+
+    @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
+    def test_update_plot(self, ax, plotter):
+        plotter(ax, ['a', 'b'], ['e', 'g'])
+        plotter(ax, ['a', 'b', 'd'], ['f', 'a', 'b'])
+        plotter(ax, ['b', 'c', 'd'], ['g', 'e', 'd'])
+        axis_test(ax.xaxis, ['a', 'b', 'd', 'c'])
+        axis_test(ax.yaxis, ['e', 'g', 'f', 'a', 'b', 'd'])
+
+    failing_test_cases = [("mixed", ['A', 3.14]),
+                          ("number integer", ['1', 1]),
+                          ("string integer", ['42', 42]),
+                          ("missing", ['12', np.nan])]
+
+    fids, fvalues = zip(*failing_test_cases)
+
+    PLOT_BROKEN_LIST = [Axes.scatter,
+                        pytest.param(Axes.plot, marks=pytest.mark.xfail),
+                        pytest.param(Axes.bar, marks=pytest.mark.xfail)]
+    PLOT_BROKEN_IDS = ["scatter", "plot", "bar"]
+
+    @pytest.mark.parametrize("plotter", PLOT_BROKEN_LIST, ids=PLOT_BROKEN_IDS)
+    @pytest.mark.parametrize("xdata", fvalues, ids=fids)
+    def test_plot_failures(self, ax, plotter, xdata):
+        with pytest.raises(TypeError):
+            plotter(ax, xdata, [1, 2])
+
+    @pytest.mark.parametrize("plotter", PLOT_BROKEN_LIST, ids=PLOT_BROKEN_IDS)
+    @pytest.mark.parametrize("xdata", fvalues, ids=fids)
+    def test_plot_failures_update(self, ax, plotter, xdata):
+        with pytest.raises(TypeError):
+            plotter(ax, [0, 3], [1, 3])
+            plotter(ax, xdata, [1, 2])

From 4d57690bbf478b91c34dd7aa58f75d4a7def5782 Mon Sep 17 00:00:00 2001
From: hannah <story645@gmail.com>
Date: Thu, 8 Feb 2018 00:30:40 -0500
Subject: [PATCH 3/5] addressing documentation comments + more use of units

---
 .appveyor.yml                              |   2 +-
 .travis.yml                                |   2 +-
 doc/api/next_api_changes/2018-02-10-HA.rst |  10 ++
 lib/matplotlib/axes/_axes.py               |   2 +-
 lib/matplotlib/axis.py                     |  11 +-
 lib/matplotlib/category.py                 | 123 +++++++++++++--------
 lib/matplotlib/tests/test_category.py      |  36 +++---
 7 files changed, 118 insertions(+), 68 deletions(-)
 create mode 100644 doc/api/next_api_changes/2018-02-10-HA.rst

diff --git a/.appveyor.yml b/.appveyor.yml
index c8d6e22627f3..afd1faa72756 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -66,7 +66,7 @@ install:
   - activate test-environment
   - echo %PYTHON_VERSION% %TARGET_ARCH%
   # pytest-cov>=2.3.1 due to https://github.com/pytest-dev/pytest-cov/issues/124
-  - pip install -q "pytest!=3.3.0" "pytest-cov>=2.3.1" pytest-rerunfailures pytest-timeout pytest-xdist
+  - pip install -q "pytest!=3.3.0,>=3.2.0" "pytest-cov>=2.3.1" pytest-rerunfailures pytest-timeout pytest-xdist
 
   # Apply patch to `subprocess` on Python versions > 2 and < 3.6.3
   # https://github.com/matplotlib/matplotlib/issues/9176
diff --git a/.travis.yml b/.travis.yml
index b85cf4e14797..4973ace7027b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,7 +52,7 @@ env:
     - NUMPY=numpy
     - PANDAS=
     - PYPARSING=pyparsing
-    - PYTEST=pytest!=3.3.0
+    - PYTEST='pytest!=3.3.0,>=3.2.0'
     - PYTEST_COV=pytest-cov
     - PYTEST_PEP8=
     - SPHINX=sphinx
diff --git a/doc/api/next_api_changes/2018-02-10-HA.rst b/doc/api/next_api_changes/2018-02-10-HA.rst
new file mode 100644
index 000000000000..6483d8c8345b
--- /dev/null
+++ b/doc/api/next_api_changes/2018-02-10-HA.rst
@@ -0,0 +1,10 @@
+Deprecated `Axis.unt_data`
+``````````````````````````
+
+Use `Axis.units` (which has long existed) instead.
+
+Only accept string-like for Categorical input
+`````````````````````````````````````````````
+
+Do not accept mixed string / float / int input, only
+strings are valid categoricals.
diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py
index f028a95df663..a09a76bd7fb5 100644
--- a/lib/matplotlib/axes/_axes.py
+++ b/lib/matplotlib/axes/_axes.py
@@ -6442,7 +6442,7 @@ def hist(self, x, bins=None, range=None, density=None, weights=None,
         if normed is not None:
             warnings.warn("The 'normed' kwarg is deprecated, and has been "
                           "replaced by the 'density' kwarg.")
-            
+
         # basic input validation
         input_empty = np.size(x) == 0
         # Massage 'x' for processing.
diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py
index f113b095d334..70ec488673cc 100644
--- a/lib/matplotlib/axis.py
+++ b/lib/matplotlib/axis.py
@@ -720,9 +720,6 @@ def __init__(self, axes, pickradius=15):
         self.labelpad = rcParams['axes.labelpad']
         self.offsetText = self._get_offset_text()
 
-        self.majorTicks = []
-        self.minorTicks = []
-
         self.pickradius = pickradius
 
         # Initialize here for testing; later add API
@@ -780,14 +777,14 @@ def limit_range_for_scale(self, vmin, vmax):
         return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos())
 
     @property
-    @cbook.deprecated("2.1.1")
+    @cbook.deprecated("2.2.0")
     def unit_data(self):
-        return self._units
+        return self.units
 
     @unit_data.setter
-    @cbook.deprecated("2.1.1")
+    @cbook.deprecated("2.2.0")
     def unit_data(self, unit_data):
-        self.set_units = unit_data
+        self.set_units(unit_data)
 
     def get_children(self):
         children = [self.label, self.offsetText]
diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py
index 1dd7c8aa469d..326d817df1ba 100644
--- a/lib/matplotlib/category.py
+++ b/lib/matplotlib/category.py
@@ -1,15 +1,19 @@
 # -*- coding: utf-8 -*-
 """
-catch all for categorical functions
+StrCategorical module for facilitating natively plotting String/Text data.
+This module contains the conversion mechanism (a monotonic mapping from
+strings to integers), tick locator and formatter, and the class:`.UnitData`
+object that creates and stores the string to integer mapping.
 """
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
 
-from collections import Iterable, OrderedDict
+from collections import OrderedDict
 import itertools
 
 import six
 
+
 import numpy as np
 
 import matplotlib.units as units
@@ -22,31 +26,26 @@
                         (bytes, six.text_type, np.str_, np.bytes_)))
 
 
-def to_str(value):
-    """Helper function to turn values to strings.
-    """
-    # Note: This function is only used by StrCategoryFormatter
-    if LooseVersion(np.__version__) < LooseVersion('1.7.0'):
-        if (isinstance(value, (six.text_type, np.unicode))):
-            value = value.encode('utf-8', 'ignore').decode('utf-8')
-    if isinstance(value, (np.bytes_, six.binary_type)):
-        value = value.decode(encoding='utf-8')
-    elif not isinstance(value, (np.str_, six.string_types)):
-        value = str(value)
-    return value
-
-
 class StrCategoryConverter(units.ConversionInterface):
     @staticmethod
     def convert(value, unit, axis):
-        """Uses axis.units to encode string data as floats
+        """Converts strings in value to floats using
+        mapping information store in the  unit object
 
         Parameters
         ----------
-        value: string, iterable
-            value or list of values to plot
-        unit:
-        axis:
+        value : string or iterable
+            value or list of values to be converted
+        unit : :class:`.UnitData`
+           object string unit information for value
+        axis : :class:`~matplotlib.Axis.axis`
+            axis on which the converted value is plotted
+
+        Returns
+        -------
+        mapped_ value : float or ndarray[float]
+
+        .. note:: axis is not used in this function
         """
         # dtype = object preserves numerical pass throughs
         values = np.atleast_1d(np.array(value, dtype=object))
@@ -57,9 +56,9 @@ def convert(value, unit, axis):
             return np.asarray(values, dtype=float)
 
         # force an update so it also does type checking
-        axis.units.update(values)
+        unit.update(values)
 
-        str2idx = np.vectorize(axis.units._mapping.__getitem__,
+        str2idx = np.vectorize(unit._mapping.__getitem__,
                                otypes=[float])
 
         mapped_value = str2idx(values)
@@ -67,16 +66,43 @@ def convert(value, unit, axis):
 
     @staticmethod
     def axisinfo(unit, axis):
-        """Sets the axis ticks and labels
+        """Sets the default axis ticks and labels
+
+        Parameters
+        ---------
+        unit : :class:`.UnitData`
+            object string unit information for value
+        axis : :class:`~matplotlib.Axis.axis`
+            axis for which information is being set
+
+        Returns
+        -------
+        :class:~matplotlib.units.AxisInfo~
+            Information to support default tick labeling
+
+        .. note: axis is not used
         """
         # locator and formatter take mapping dict because
         # args need to be pass by reference for updates
-        majloc = StrCategoryLocator(axis.units)
-        majfmt = StrCategoryFormatter(axis.units)
+        majloc = StrCategoryLocator(unit._mapping)
+        majfmt = StrCategoryFormatter(unit._mapping)
         return units.AxisInfo(majloc=majloc, majfmt=majfmt)
 
     @staticmethod
-    def default_units(data=None, axis=None):
+    def default_units(data, axis):
+        """ Sets and updates the :class:`~matplotlib.Axis.axis~ units
+
+        Parameters
+        ----------
+        data : string or iterable of strings
+        axis : :class:`~matplotlib.Axis.axis`
+            axis on which the data is plotted
+
+        Returns
+        -------
+        class:~.UnitData~
+            object storing string to integer mapping
+        """
         # the conversion call stack is supposed to be
         # default_units->axis_info->convert
         if axis.units is None:
@@ -88,17 +114,17 @@ def default_units(data=None, axis=None):
 
 class StrCategoryLocator(ticker.Locator):
     """tick at every integer mapping of the string data"""
-    def __init__(self, units):
+    def __init__(self, units_mapping):
         """
         Parameters
         -----------
         units: dict
-              (string, integer) mapping
+             string:integer mapping
         """
-        self._units = units
+        self._units = units_mapping
 
     def __call__(self):
-        return list(self._units._mapping.values())
+        return list(self._units.values())
 
     def tick_values(self, vmin, vmax):
         return self()
@@ -106,21 +132,35 @@ def tick_values(self, vmin, vmax):
 
 class StrCategoryFormatter(ticker.Formatter):
     """String representation of the data at every tick"""
-    def __init__(self, units):
+    def __init__(self, units_mapping):
         """
         Parameters
         ----------
         units: dict
-              (string, integer) mapping
+            string:integer mapping
         """
-        self._units = units
+        self._units = units_mapping
 
     def __call__(self, x, pos=None):
         if pos is None:
             return ""
-        r_mapping = {v: to_str(k) for k, v in self._units._mapping.items()}
+        r_mapping = {v: StrCategoryFormatter._text(k)
+                     for k, v in self._units.items()}
         return r_mapping.get(int(np.round(x)), '')
 
+    @staticmethod
+    def _text(value):
+        """Converts text values into `utf-8` or `ascii` strings
+        """
+        if LooseVersion(np.__version__) < LooseVersion('1.7.0'):
+            if (isinstance(value, (six.text_type, np.unicode))):
+                value = value.encode('utf-8', 'ignore').decode('utf-8')
+        if isinstance(value, (np.bytes_, six.binary_type)):
+            value = value.decode(encoding='utf-8')
+        elif not isinstance(value, (np.str_, six.string_types)):
+            value = str(value)
+        return value
+
 
 class UnitData(object):
     def __init__(self, data=None):
@@ -130,11 +170,10 @@ def __init__(self, data=None):
         data: iterable
               sequence of string values
         """
-        if data is None:
-            data = ()
         self._mapping = OrderedDict()
         self._counter = itertools.count(start=0)
-        self.update(data)
+        if data is not None:
+            self.update(data)
 
     def update(self, data):
         """Maps new values to integer identifiers.
@@ -149,13 +188,9 @@ def update(self, data):
         TypeError
               If the value in data is not a string, unicode, bytes type
         """
+        data = np.atleast_1d(np.array(data, dtype=object))
 
-        if (isinstance(data, VALID_TYPES) or
-                not isinstance(data, Iterable)):
-            data = [data]
-
-        unsorted_unique = OrderedDict.fromkeys(data)
-        for val in unsorted_unique:
+        for val in OrderedDict.fromkeys(data):
             if not isinstance(val, VALID_TYPES):
                 raise TypeError("{val!r} is not a string".format(val=val))
             if val not in self._mapping:
diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py
index de8eb49d7a75..40f9d078ec5e 100644
--- a/lib/matplotlib/tests/test_category.py
+++ b/lib/matplotlib/tests/test_category.py
@@ -9,6 +9,9 @@
 import matplotlib.pyplot as plt
 import matplotlib.category as cat
 
+# Python2/3 text handling
+_to_str = cat.StrCategoryFormatter._text
+
 
 class TestUnitData(object):
     test_cases = [('single', (["hello world"], [0])),
@@ -86,53 +89,57 @@ class TestStrCategoryConverter(object):
     @pytest.fixture(autouse=True)
     def mock_axis(self, request):
         self.cc = cat.StrCategoryConverter()
-        # self.unit should be probably be  replaced with real mock unit
+        # self.unit should be probably be replaced with real mock unit
         self.unit = cat.UnitData()
         self.ax = FakeAxis(self.unit)
 
     @pytest.mark.parametrize("vals", values, ids=ids)
     def test_convert(self, vals):
-        np.testing.assert_allclose(self.cc.convert(vals, None, self.ax),
+        np.testing.assert_allclose(self.cc.convert(vals, self.ax.units,
+                                                   self.ax),
                                    range(len(vals)))
 
     @pytest.mark.parametrize("value", ["hi", "мир"], ids=["ascii", "unicode"])
     def test_convert_one_string(self, value):
-        assert self.cc.convert(value, None, self.ax) == 0
+        assert self.cc.convert(value, self.unit, self.ax) == 0
 
     def test_convert_one_number(self):
-        actual = self.cc.convert(0.0, None, self.ax)
+        actual = self.cc.convert(0.0, self.unit, self.ax)
         np.testing.assert_allclose(actual, np.array([0.]))
 
     def test_convert_float_array(self):
         data = np.array([1, 2, 3], dtype=float)
-        actual = self.cc.convert(data, None, self.ax)
+        actual = self.cc.convert(data, self.unit, self.ax)
         np.testing.assert_allclose(actual, np.array([1., 2., 3.]))
 
     @pytest.mark.parametrize("fvals", fvalues, ids=fids)
     def test_convert_fail(self, fvals):
         with pytest.raises(TypeError):
-            self.cc.convert(fvals, None, self.ax)
+            self.cc.convert(fvals, self.unit, self.ax)
 
     def test_axisinfo(self):
-        axis = self.cc.axisinfo(None, self.ax)
+        axis = self.cc.axisinfo(self.unit, self.ax)
         assert isinstance(axis.majloc, cat.StrCategoryLocator)
         assert isinstance(axis.majfmt, cat.StrCategoryFormatter)
 
     def test_default_units(self):
         assert isinstance(self.cc.default_units(["a"], self.ax), cat.UnitData)
 
+
 @pytest.fixture
 def ax():
     return plt.figure().subplots()
 
+
 PLOT_LIST = [Axes.scatter, Axes.plot, Axes.bar]
 PLOT_IDS = ["scatter", "plot", "bar"]
 
+
 class TestStrCategoryLocator(object):
     def test_StrCategoryLocator(self):
         locs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
         unit = cat.UnitData([str(j) for j in locs])
-        ticks = cat.StrCategoryLocator(unit)
+        ticks = cat.StrCategoryLocator(unit._mapping)
         np.testing.assert_array_equal(ticks.tick_values(None, None), locs)
 
     @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
@@ -150,16 +157,16 @@ class TestStrCategoryFormatter(object):
     @pytest.mark.parametrize("ydata", cases, ids=ids)
     def test_StrCategoryFormatter(self, ax, ydata):
         unit = cat.UnitData(ydata)
-        labels = cat.StrCategoryFormatter(unit)
+        labels = cat.StrCategoryFormatter(unit._mapping)
         for i, d in enumerate(ydata):
-            assert labels(i, i) == d
+            assert labels(i, i) == _to_str(d)
 
     @pytest.mark.parametrize("ydata", cases, ids=ids)
     @pytest.mark.parametrize("plotter", PLOT_LIST, ids=PLOT_IDS)
     def test_StrCategoryFormatterPlot(self, ax, ydata, plotter):
         plotter(ax, range(len(ydata)), ydata)
         for i, d in enumerate(ydata):
-            assert ax.yaxis.major.formatter(i, i) == d
+            assert ax.yaxis.major.formatter(i, i) == _to_str(d)
         assert ax.yaxis.major.formatter(i+1, i+1) == ""
         assert ax.yaxis.major.formatter(0, None) == ""
 
@@ -168,7 +175,7 @@ def axis_test(axis, labels):
     ticks = list(range(len(labels)))
     np.testing.assert_array_equal(axis.get_majorticklocs(), ticks)
     graph_labels = [axis.major.formatter(i, i) for i in ticks]
-    assert graph_labels == [cat.to_str(l) for l in labels]
+    assert graph_labels == [_to_str(l) for l in labels]
     assert list(axis.units._mapping.keys()) == [l for l in labels]
     assert list(axis.units._mapping.values()) == ticks
 
@@ -254,17 +261,18 @@ def test_update_plot(self, ax, plotter):
     PLOT_BROKEN_LIST = [Axes.scatter,
                         pytest.param(Axes.plot, marks=pytest.mark.xfail),
                         pytest.param(Axes.bar, marks=pytest.mark.xfail)]
+
     PLOT_BROKEN_IDS = ["scatter", "plot", "bar"]
 
     @pytest.mark.parametrize("plotter", PLOT_BROKEN_LIST, ids=PLOT_BROKEN_IDS)
     @pytest.mark.parametrize("xdata", fvalues, ids=fids)
-    def test_plot_failures(self, ax, plotter, xdata):
+    def test_mixed_type_exception(self, ax, plotter, xdata):
         with pytest.raises(TypeError):
             plotter(ax, xdata, [1, 2])
 
     @pytest.mark.parametrize("plotter", PLOT_BROKEN_LIST, ids=PLOT_BROKEN_IDS)
     @pytest.mark.parametrize("xdata", fvalues, ids=fids)
-    def test_plot_failures_update(self, ax, plotter, xdata):
+    def test_mixed_type_update_exception(self, ax, plotter, xdata):
         with pytest.raises(TypeError):
             plotter(ax, [0, 3], [1, 3])
             plotter(ax, xdata, [1, 2])

From 22e3a6692c68cb00e0e27591c21fd9eef3d3f7be Mon Sep 17 00:00:00 2001
From: Thomas A Caswell <tcaswell@gmail.com>
Date: Sun, 11 Feb 2018 10:04:19 -0500
Subject: [PATCH 4/5] DOC: fix typo

---
 doc/api/next_api_changes/2018-02-10-HA.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/api/next_api_changes/2018-02-10-HA.rst b/doc/api/next_api_changes/2018-02-10-HA.rst
index 6483d8c8345b..b735aa6cd6c0 100644
--- a/doc/api/next_api_changes/2018-02-10-HA.rst
+++ b/doc/api/next_api_changes/2018-02-10-HA.rst
@@ -1,5 +1,5 @@
-Deprecated `Axis.unt_data`
-``````````````````````````
+Deprecated `Axis.unit_data`
+```````````````````````````
 
 Use `Axis.units` (which has long existed) instead.
 

From c7d57f6557dbf7e3c6674f48333bf039377e0588 Mon Sep 17 00:00:00 2001
From: Jody Klymak <jklymak@gmail.com>
Date: Sun, 11 Feb 2018 13:39:56 -0800
Subject: [PATCH 5/5] Suggested pre-amble change

---
 lib/matplotlib/category.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py
index 326d817df1ba..dfbebf870c07 100644
--- a/lib/matplotlib/category.py
+++ b/lib/matplotlib/category.py
@@ -1,9 +1,15 @@
 # -*- coding: utf-8 -*-
 """
-StrCategorical module for facilitating natively plotting String/Text data.
-This module contains the conversion mechanism (a monotonic mapping from
-strings to integers), tick locator and formatter, and the class:`.UnitData`
-object that creates and stores the string to integer mapping.
+Module that allows plotting of string "category" data.  i.e.
+``plot(['d', 'f', 'a'],[1, 2, 3])`` will plot three points with x-axis
+values of 'd', 'f', 'a'.
+
+See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an
+example.
+
+The module uses Matplotlib's `matplotlib.units` mechanism to convert from
+strings to integers, provides a tick locator and formatter, and the
+class:`.UnitData` that creates and stores the string-to-integer mapping.   
 """
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)