CategoricalNorm kinda sort works, relies on StrCategorical

story645 · story645 · commit 7eb0e8c39226 · 2016-08-17T07:02:28.000-04:00
diff --git a/build_alllocal.cmd b/build_alllocal.cmd
diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py
@@ -20,7 +20,7 @@
 NP_NEW = (LooseVersion(np.version.version) >= LooseVersion('1.7'))
 
 
-def to_array(data, maxlen=100):
+def to_str_array(data, maxlen=100):
     if NP_NEW:
         return np.array(data, dtype=np.unicode)
     if cbook.is_scalar_or_string(data):
@@ -53,13 +53,13 @@ def convert(value, unit, axis):
         vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs))
 
         if isinstance(value, six.string_types):
-            return vmap[value]
+            return vmap.get(value, None)
 
-        vals = to_array(value)
+        vals = to_str_array(value)
         for lab, loc in vmap.items():
             vals[vals == lab] = loc
 
-        return vals.astype('float')
+        return vals.astype('float64')
 
     @staticmethod
     def axisinfo(unit, axis):
@@ -74,16 +74,20 @@ def axisinfo(unit, axis):
         return munits.AxisInfo(majloc=majloc, majfmt=majfmt)
 
     @staticmethod
-    def default_units(data, axis):
+    def default_units(data, axis, sort=True):
         """
         Create mapping between string categories in *data*
         and integers, then store in *axis.unit_data*
         """
-        if axis.unit_data is None:
-            axis.unit_data = UnitData(data)
-        else:
-            axis.unit_data.update(data)
-        return None
+
+        if axis and axis.unit_data:
+            axis.unit_data.update(data, sort)
+            return
+
+        unit_data = UnitData(data, sort)
+        if axis:
+            axis.unit_data = unit_data
+        return unit_data
 
 
 class StrCategoryLocator(mticker.FixedLocator):
@@ -115,30 +119,26 @@ def __init__(self, categories):
         *categories*
             distinct values for mapping
 
-        Out-of-range values are mapped to a value not in categories;
-        these are then converted to valid indices by :meth:`Colormap.__call__`.
+        Out-of-range values are mapped to np.nan
         """
-        self.categories = categories
+
+        self.unit_data = StrCategoryConverter.default_units(categories,
+                                                            None, sort=False)
+        self.categories = to_str_array(categories)
         self.N = len(self.categories)
-        self.vmin = 0
-        self.vmax = self.N
-        self._interp = False
+        self.nvals = self.unit_data.locs
+        self.vmin = min(self.nvals)
+        self.vmax = max(self.nvals)
 
     def __call__(self, value, clip=None):
-        if not cbook.iterable(value):
-            value = [value]
-
-        value = np.asarray(value)
-        ret = np.ones(value.shape) * np.nan
-
-        for i, c in enumerate(self.categories):
-            ret[value == c] = i / (self.N * 1.0)
-
-        return np.ma.array(ret, mask=np.isnan(ret))
-
-    def inverse(self, value):
-        # not quite sure what invertible means in this context
-        return ValueError("CategoryNorm is not invertible")
+        # gonna have to go into imshow and undo casting
+        value = np.asarray(value, dtype=int)
+        ret = StrCategoryConverter.convert(value, None, self)
+        # knock out values not in the norm
+        mask = np.in1d(ret, self.unit_data.locs).reshape(ret.shape)
+        # normalize ret
+        ret /=  self.vmax
+        return np.ma.array(ret, mask=~mask)
 
 
 def colors_from_categories(codings):
@@ -187,27 +187,40 @@ class UnitData(object):
     # debatable makes sense to special code missing values
     spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0}
 
-    def __init__(self, data):
+    def __init__(self, data, sort=True):
         """Create mapping between unique categorical values
         and numerical identifier
         Paramters
         ---------
         data: iterable
             sequence of values
+        sort: bool
+            sort input data, default is True
+            False preserves input order
         """
         self.seq, self.locs = [], []
-        self._set_seq_locs(data, 0)
+        self._set_seq_locs(data, 0, sort)
+        self.sort = sort
 
-    def update(self, new_data):
+    def update(self, new_data, sort=None):
+        if sort:
+            self.sort = sort
         # so as not to conflict with spdict
         value = max(max(self.locs) + 1, 0)
-        self._set_seq_locs(new_data, value)
+        self._set_seq_locs(new_data, value, self.sort)
 
-    def _set_seq_locs(self, data, value):
+    def _set_seq_locs(self, data, value, sort):
         # magic to make it work under np1.6
-        strdata = to_array(data)
+        strdata = to_str_array(data)
+
         # np.unique makes dateframes work
-        new_s = [d for d in np.unique(strdata) if d not in self.seq]
+        if sort:
+            unq = np.unique(strdata)
+        else:
+            _, idx = np.unique(strdata, return_index=~sort)
+            unq = strdata[np.sort(idx)]
+
+        new_s = [d for d in unq if d not in self.seq]
         for ns in new_s:
             self.seq.append(convert_to_string(ns))
             if ns in UnitData.spdict.keys():
diff --git a/lib/matplotlib/colorbar.py b/lib/matplotlib/colorbar.py
@@ -30,6 +30,7 @@
 
 import matplotlib as mpl
 import matplotlib.artist as martist
+import matplotlib.category as category
 import matplotlib.cbook as cbook
 import matplotlib.collections as collections
 import matplotlib.colors as colors
@@ -580,6 +581,8 @@ def _ticker(self):
                     locator = ticker.FixedLocator(b, nbins=10)
                 elif isinstance(self.norm, colors.LogNorm):
                     locator = ticker.LogLocator()
+                elif isinstance(self.norm, category.CategoryNorm):
+                    locator = ticker.FixedLocator(self.norm.nvals + 0.5)
                 else:
                     if mpl.rcParams['_internal.classic_mode']:
                         locator = ticker.MaxNLocator()
diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py
@@ -106,7 +106,7 @@ def test_axisinfo(self):
 
     def test_default_units(self):
         axis = FakeAxis(None)
-        assert self.cc.default_units(["a"], axis) is None
+        assert isinstance(self.cc.default_units(["a"], axis), cat.UnitData)
 
 
 class TestStrCategoryLocator(object):
@@ -129,17 +129,24 @@ def test_StrCategoryFormatterUnicode(self):
 
 
 class TestCategoryNorm(object):
-    testdata = [[[205, 302, 205, 101], [0, 2. / 3., 0, 1. / 3.]],
-                [[205, np.nan, 101, 305], [0, 9999, 1. / 3., 2. / 3.]],
-                [[205, 101, 504, 101], [0, 9999, 1. / 3., 1. / 3.]]]
+    testdata = [[[205, 302, 205, 101], [0, 2, 0, 1]],
+                [[205, np.nan, 101, 305], [0, np.nan, 1, 2]],
+                [[205, 101, 504, 101], [0, 1, np.nan, 1]]]
 
     ids = ["regular", "nan", "exclude"]
 
     @pytest.mark.parametrize("data, nmap", testdata, ids=ids)
     def test_norm(self, data, nmap):
         norm = cat.CategoryNorm([205, 101, 302])
-        test = np.ma.masked_equal(nmap, 9999)
-        np.testing.assert_allclose(norm(data), test)
+        np.testing.assert_array_equal(norm(data), nmap)
+
+
+def test_colors_from_categories():
+    codings = {205: "red", 101: "blue", 302: "green"}
+    cmap, norm = cat.colors_from_categories(codings)
+    assert cmap.colors == ['red', 'green', 'blue']
+    np.testing.assert_array_equal(norm.categories, ['205', '302', '101'])
+    assert cmap.N == norm.N
 
 
 def lt(tl):