From a16e025f8aaa3a9dec9b9b05af2e2ab2eb4ea2d3 Mon Sep 17 00:00:00 2001 From: hannah Date: Tue, 2 Aug 2016 23:44:21 -0400 Subject: [PATCH 1/5] support for py.test, updating axis ticks, and unitData as class --- lib/matplotlib/axis.py | 8 +- lib/matplotlib/category.py | 118 ++++++----- lib/matplotlib/tests/test_category.py | 278 ++++++++++++-------------- 3 files changed, 183 insertions(+), 221 deletions(-) diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py index 276c09605a70..cbccdb695489 100644 --- a/lib/matplotlib/axis.py +++ b/lib/matplotlib/axis.py @@ -642,7 +642,7 @@ def __init__(self, axes, pickradius=15): self.offsetText = self._get_offset_text() self.majorTicks = [] self.minorTicks = [] - self.unit_data = [] + self.unit_data = None self.pickradius = pickradius # Initialize here for testing; later add API @@ -695,14 +695,14 @@ def limit_range_for_scale(self, vmin, vmax): @property def unit_data(self): - """Holds data that a ConversionInterface subclass relys on + """Holds data that a ConversionInterface subclass uses to convert between labels and indexes """ return self._unit_data @unit_data.setter - def unit_data(self, data): - self._unit_data = data + def unit_data(self, unit_data): + self._unit_data = unit_data def get_children(self): children = [self.label, self.offsetText] diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index bfac242149c3..9c6d4809959f 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -9,6 +9,7 @@ import numpy as np +import matplotlib.cbook as cbook import matplotlib.units as units import matplotlib.ticker as ticker @@ -22,10 +23,12 @@ def to_array(data, maxlen=100): if NP_NEW: return np.array(data, dtype=np.unicode) + if cbook.is_scalar_or_string(data): + data = [data] try: vals = np.array(data, dtype=('|S', maxlen)) except UnicodeEncodeError: - # pure hack + # this yields gibberish vals = np.array([convert_to_string(d) for d in data]) return vals @@ -36,40 +39,44 @@ def convert(value, unit, axis): """Uses axis.unit_data map to encode data as floats """ - vmap = dict(axis.unit_data) + vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs)) if isinstance(value, six.string_types): return vmap[value] vals = to_array(value) - for lab, loc in axis.unit_data: + for lab, loc in vmap.items(): vals[vals == lab] = loc return vals.astype('float') @staticmethod def axisinfo(unit, axis): - seq, locs = zip(*axis.unit_data) - majloc = StrCategoryLocator(locs) - majfmt = StrCategoryFormatter(seq) + majloc = StrCategoryLocator(axis.unit_data.locs) + majfmt = StrCategoryFormatter(axis.unit_data.seq) return units.AxisInfo(majloc=majloc, majfmt=majfmt) @staticmethod def default_units(data, axis): # the conversion call stack is: # default_units->axis_info->convert - axis.unit_data = map_categories(data, axis.unit_data) + if axis.unit_data is None: + axis.unit_data = UnitData(data) + else: + axis.unit_data.update(data) return None class StrCategoryLocator(ticker.FixedLocator): def __init__(self, locs): - super(StrCategoryLocator, self).__init__(locs, None) + self.locs = locs + self.nbins = None class StrCategoryFormatter(ticker.FixedFormatter): def __init__(self, seq): - super(StrCategoryFormatter, self).__init__(seq) + self.seq = seq + self.offset_string = '' def convert_to_string(value): @@ -77,8 +84,8 @@ def convert_to_string(value): np.array(...,dtype=unicode) for all later versions of numpy""" if isinstance(value, six.string_types): - return value - if np.isfinite(value): + pass + elif np.isfinite(value): value = np.asarray(value, dtype=str)[np.newaxis][0] elif np.isnan(value): value = 'nan' @@ -91,58 +98,45 @@ def convert_to_string(value): return value -def map_categories(data, old_map=None): - """Create mapping between unique categorical - values and numerical identifier. - - Paramters - --------- - data: iterable - sequence of values - old_map: list of tuple, optional - if not `None`, than old_mapping will be updated with new values and - previous mappings will remain unchanged) - sort: bool, optional - sort keys by ASCII value - - Returns - ------- - list of tuple - [(label, ticklocation),...] - - """ - - # code typical missing data in the negative range because - # everything else will always have positive encoding - # question able if it even makes sense +class UnitData(object): + # debatable makes sense to special code missing values spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0} - - if isinstance(data, six.string_types): - data = [data] - - # will update this post cbook/dict support - strdata = to_array(data) - uniq = np.unique(strdata) - - if old_map: - olabs, okeys = zip(*old_map) - svalue = max(okeys) + 1 - else: - old_map, olabs, okeys = [], [], [] - svalue = 0 - - category_map = old_map[:] - - new_labs = [u for u in uniq if u not in olabs] - missing = [nl for nl in new_labs if nl in spdict.keys()] - - category_map.extend([(m, spdict[m]) for m in missing]) - - new_labs = [nl for nl in new_labs if nl not in missing] - - new_locs = np.arange(svalue, svalue + len(new_labs), dtype='float') - category_map.extend(list(zip(new_labs, new_locs))) - return category_map + # used to set out of bounds + LOWER_BOUND = -4 + + def __init__(self, data): + """Create mapping between unique categorical values + and numerical identifier + Paramters + --------- + data: iterable + sequence of values + """ + self.seq, self.locs = [], [] + self._set_seq(data) + self._set_locs(0) + + def update(self, new_data): + self._set_seq(new_data) + value = max(self.locs) + self._set_locs(value + 1) + + def _set_seq(self, data): + #magic to make it work under np1.6 + strdata = to_array(data) + # np.unique makes dateframes work + for d in np.unique(strdata): + if d not in self.seq: + self.seq.append(convert_to_string(d)) + self.locs.append(UnitData.LOWER_BOUND) + + def _set_locs(self, value): + for i, s in enumerate(self.seq): + if s in UnitData.spdict.keys(): + self.locs[i] = UnitData.spdict[s] + elif self.locs[i] == UnitData.LOWER_BOUND: + self.locs[i] = value + value += 1 # Connects the convertor to matplotlib diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index 02db774e4ff6..aa6afff11f41 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -2,254 +2,222 @@ """Catch all for categorical functions""" from __future__ import (absolute_import, division, print_function, unicode_literals) -import unittest +import pytest import numpy as np import matplotlib.pyplot as plt from matplotlib.testing.decorators import cleanup import matplotlib.category as cat +import unittest -class TestConvertToString(unittest.TestCase): - def setUp(self): - pass - - def test_string(self): - self.assertEqual("abc", cat.convert_to_string("abc")) - def test_unicode(self): - self.assertEqual("Здравствуйте мир", - cat.convert_to_string("Здравствуйте мир")) +class TestConvertToString(object): + testdata = [("abc", "abc"), ("Здравствуйте мир", "Здравствуйте мир"), + ("3.14", 3.14), ("nan", np.nan), + ("inf", np.inf), ("-inf", -np.inf)] + ids = ["string", "unicode", "decimal", "nan", "posinf", "neginf", ] - def test_decimal(self): - self.assertEqual("3.14", cat.convert_to_string(3.14)) + @pytest.mark.parametrize("expected, test", testdata, ids=ids) + def test_convert_to_string(self, expected, test): + assert expected == cat.convert_to_string(test) - def test_nan(self): - self.assertEqual("nan", cat.convert_to_string(np.nan)) - def test_posinf(self): - self.assertEqual("inf", cat.convert_to_string(np.inf)) +class TestUnitData(object): + testdata = [("hello world", ["hello world"], [0]), + ("Здравствуйте мир", ["Здравствуйте мир"], [0]), + (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf], + ['-inf', '3.14', 'A', 'B', 'inf', 'nan'], + [-3.0, 0, 1, 2, -2.0, -1.0])] - def test_neginf(self): - self.assertEqual("-inf", cat.convert_to_string(-np.inf)) + ids = ["single", "unicode", "mixed"] + @pytest.mark.parametrize("data, seq, locs", testdata, ids=ids) + def test_unit(self, data, seq, locs): + act = cat.UnitData(data) + assert act.seq == seq + assert act.locs == locs -class TestMapCategories(unittest.TestCase): - def test_map_unicode(self): - act = cat.map_categories("Здравствуйте мир") - exp = [("Здравствуйте мир", 0)] - self.assertListEqual(act, exp) + def test_update_map(self): + data = ['a', 'd'] + oseq = ['a', 'd'] + olocs = [0, 1] - def test_map_data(self): - act = cat.map_categories("hello world") - exp = [("hello world", 0)] - self.assertListEqual(act, exp) + data_update = ['b', 'd', 'e', np.inf] + useq = ['a', 'd', 'b', 'e', 'inf'] + ulocs = [0, 1, 2, 3, -2] - def test_map_data_basic(self): - data = ['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'] - exp = [('a', 0), ('b', 1), ('c', 2)] - act = cat.map_categories(data) - self.assertListEqual(sorted(act), sorted(exp)) + unitdata = cat.UnitData(data) + assert unitdata.seq == oseq + assert unitdata.locs == olocs - def test_map_data_mixed(self): - data = ['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf] - exp = [('nan', -1), ('3.14', 0), - ('A', 1), ('B', 2), ('-inf', -3), ('inf', -2)] + unitdata.update(data_update) + assert unitdata.seq == useq + assert unitdata.locs == ulocs - act = cat.map_categories(data) - self.assertListEqual(sorted(act), sorted(exp)) - @unittest.SkipTest - def test_update_map(self): - data = ['b', 'd', 'e', np.inf] - old_map = [('a', 0), ('d', 1)] - exp = [('inf', -2), ('a', 0), ('d', 1), - ('b', 2), ('e', 3)] - act = cat.map_categories(data, old_map) - self.assertListEqual(sorted(act), sorted(exp)) +class FakeAxis(object): + def __init__(self, unit_data): + self.unit_data = unit_data -class FakeAxis(object): - def __init__(self): - self.unit_data = [] +class MockUnitData(object): + def __init__(self, data): + seq, locs = zip(*data) + self.seq = list(seq) + self.locs = list(locs) -class TestStrCategoryConverter(unittest.TestCase): +class TestStrCategoryConverter(object): """Based on the pandas conversion and factorization tests: ref: /pandas/tseries/tests/test_converter.py /pandas/tests/test_algos.py:TestFactorize """ - - def setUp(self): + testdata = [("Здравствуйте мир", [("Здравствуйте мир", 42)], 42), + ("hello world", [("hello world", 42)], 42), + (['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], + [('a', 0), ('b', 1), ('c', 2)], + [0, 1, 1, 0, 0, 2, 2, 2]), + (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf], + [('nan', -1), ('3.14', 0), ('A', 1), ('B', 2), + ('-inf', 100), ('inf', 200)], + [1, 1, -1, 2, 100, 0, 200])] + ids = ["unicode", "single", "basic", "mixed"] + + @pytest.fixture(autouse=True) + def mock_axis(self, request): self.cc = cat.StrCategoryConverter() - self.axis = FakeAxis() - - def test_convert_unicode(self): - self.axis.unit_data = [("Здравствуйте мир", 42)] - act = self.cc.convert("Здравствуйте мир", None, self.axis) - exp = 42 - self.assertEqual(act, exp) - - def test_convert_single(self): - self.axis.unit_data = [("hello world", 42)] - act = self.cc.convert("hello world", None, self.axis) - exp = 42 - self.assertEqual(act, exp) - - def test_convert_basic(self): - data = ['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'] - exp = [0, 1, 1, 0, 0, 2, 2, 2] - self.axis.unit_data = [('a', 0), ('b', 1), ('c', 2)] - act = self.cc.convert(data, None, self.axis) - np.testing.assert_array_equal(act, exp) - def test_convert_mixed(self): - data = ['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf] - exp = [1, 1, -1, 2, 100, 0, 200] - self.axis.unit_data = [('nan', -1), ('3.14', 0), - ('A', 1), ('B', 2), - ('-inf', 100), ('inf', 200)] - act = self.cc.convert(data, None, self.axis) + @pytest.mark.parametrize("data, unitmap, exp", testdata, ids=ids) + def test_convert(self, data, unitmap, exp): + MUD = MockUnitData(unitmap) + axis = FakeAxis(MUD) + act = self.cc.convert(data, None, axis) np.testing.assert_array_equal(act, exp) def test_axisinfo(self): - self.axis.unit_data = [('a', 0)] - ax = self.cc.axisinfo(None, self.axis) - self.assertTrue(isinstance(ax.majloc, cat.StrCategoryLocator)) - self.assertTrue(isinstance(ax.majfmt, cat.StrCategoryFormatter)) + MUD = MockUnitData([(None, None)]) + axis = FakeAxis(MUD) + ax = self.cc.axisinfo(None, axis) + assert isinstance(ax.majloc, cat.StrCategoryLocator) + assert isinstance(ax.majfmt, cat.StrCategoryFormatter) def test_default_units(self): - self.assertEqual(self.cc.default_units(["a"], self.axis), None) - + axis = FakeAxis(None) + assert self.cc.default_units(["a"], axis) is None -class TestStrCategoryLocator(unittest.TestCase): - def setUp(self): - self.locs = list(range(10)) +class TestStrCategoryLocator(object): def test_StrCategoryLocator(self): - ticks = cat.StrCategoryLocator(self.locs) - np.testing.assert_equal(ticks.tick_values(None, None), - self.locs) + locs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ticks = cat.StrCategoryLocator(locs) + np.testing.assert_array_equal(ticks.tick_values(None, None), locs) class TestStrCategoryFormatter(unittest.TestCase): - def setUp(self): - self.seq = ["hello", "world", "hi"] - def test_StrCategoryFormatter(self): - labels = cat.StrCategoryFormatter(self.seq) - self.assertEqual(labels('a', 1), "world") + seq = ["hello", "world", "hi"] + labels = cat.StrCategoryFormatter(seq) + assert labels('a', 1) == "world" + + def test_StrCategoryFormatterUnicode(self): + seq = ["Здравствуйте", "привет"] + labels = cat.StrCategoryFormatter(seq) + assert labels('a', 1) == "привет" def lt(tl): return [l.get_text() for l in tl] -class TestPlot(unittest.TestCase): - - def setUp(self): +class TestPlot(object): + @pytest.fixture + def data(self): self.d = ['a', 'b', 'c', 'a'] self.dticks = [0, 1, 2] self.dlabels = ['a', 'b', 'c'] - self.dunit_data = [('a', 0), ('b', 1), ('c', 2)] + unitmap = [('a', 0), ('b', 1), ('c', 2)] + self.dunit_data = MockUnitData(unitmap) + @pytest.fixture + def missing_data(self): self.dm = ['here', np.nan, 'here', 'there'] - self.dmticks = [-1, 0, 1] - self.dmlabels = ['nan', 'here', 'there'] - self.dmunit_data = [('nan', -1), ('here', 0), ('there', 1)] + self.dmticks = [0, -1, 1] + self.dmlabels = ['here', 'nan', 'there'] + unitmap = [('here', 0), ('nan', -1), ('there', 1)] + self.dmunit_data = MockUnitData(unitmap) + + def axis_test(self, axis, ticks, labels, unit_data): + np.testing.assert_array_equal(axis.get_majorticklocs(), ticks) + assert lt(axis.get_majorticklabels()) == labels + np.testing.assert_array_equal(axis.unit_data.locs, unit_data.locs) + assert axis.unit_data.seq == unit_data.seq @cleanup def test_plot_unicode(self): - # needs image test - works but - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) + # Image test would fail on numpy 1.6 words = ['Здравствуйте', 'привет'] locs = [0.0, 1.0] + unit_data = MockUnitData(zip(words, locs)) + + fig, ax = plt.subplots() ax.plot(words) fig.canvas.draw() - self.assertListEqual(ax.yaxis.unit_data, - list(zip(words, locs))) - np.testing.assert_array_equal(ax.get_yticks(), locs) - self.assertListEqual(lt(ax.get_yticklabels()), words) + self.axis_test(ax.yaxis, locs, words, unit_data) @cleanup + @pytest.mark.usefixtures("data") def test_plot_1d(self): - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) + fig, ax = plt.subplots() ax.plot(self.d) fig.canvas.draw() - np.testing.assert_array_equal(ax.get_yticks(), self.dticks) - self.assertListEqual(lt(ax.get_yticklabels()), - self.dlabels) - self.assertListEqual(ax.yaxis.unit_data, self.dunit_data) + self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data) @cleanup + @pytest.mark.usefixtures("missing_data") def test_plot_1d_missing(self): - - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) + fig, ax = plt.subplots() ax.plot(self.dm) fig.canvas.draw() - np.testing.assert_array_equal(ax.get_yticks(), self.dmticks) - self.assertListEqual(lt(ax.get_yticklabels()), - self.dmlabels) - self.assertListEqual(ax.yaxis.unit_data, self.dmunit_data) + self.axis_test(ax.yaxis, self.dmticks, self.dmlabels, self.dmunit_data) @cleanup + @pytest.mark.usefixtures("data", "missing_data") def test_plot_2d(self): - - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) + fig, ax = plt.subplots() ax.plot(self.dm, self.d) fig.canvas.draw() - np.testing.assert_array_equal(ax.get_xticks(), self.dmticks) - self.assertListEqual(lt(ax.get_xticklabels()), - self.dmlabels) - self.assertListEqual(ax.xaxis.unit_data, self.dmunit_data) - - np.testing.assert_array_equal(ax.get_yticks(), self.dticks) - self.assertListEqual(lt(ax.get_yticklabels()), - self.dlabels) - self.assertListEqual(ax.yaxis.unit_data, self.dunit_data) + self.axis_test(ax.xaxis, self.dmticks, self.dmlabels, self.dmunit_data) + self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data) @cleanup + @pytest.mark.usefixtures("data", "missing_data") def test_scatter_2d(self): - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) + fig, ax = plt.subplots() ax.scatter(self.dm, self.d) fig.canvas.draw() - np.testing.assert_array_equal(ax.get_xticks(), self.dmticks) - self.assertListEqual(lt(ax.get_xticklabels()), - self.dmlabels) - self.assertListEqual(ax.xaxis.unit_data, self.dmunit_data) + self.axis_test(ax.xaxis, self.dmticks, self.dmlabels, self.dmunit_data) + self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data) - np.testing.assert_array_equal(ax.get_yticks(), self.dticks) - self.assertListEqual(lt(ax.get_yticklabels()), - self.dlabels) - self.assertListEqual(ax.yaxis.unit_data, self.dunit_data) - - @unittest.SkipTest @cleanup def test_plot_update(self): - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) + fig, ax = plt.subplots() ax.plot(['a', 'b']) ax.plot(['a', 'b', 'd']) ax.plot(['b', 'c', 'd']) fig.canvas.draw() - labels_new = ['a', 'b', 'd', 'c'] - ticks_new = [0, 1, 2, 3] - self.assertListEqual(ax.yaxis.unit_data, - list(zip(labels_new, ticks_new))) - np.testing.assert_array_equal(ax.get_yticks(), ticks_new) - self.assertListEqual(lt(ax.get_yticklabels()), labels_new) + labels = ['a', 'b', 'd', 'c'] + ticks = [0, 1, 2, 3] + unit_data = MockUnitData(list(zip(labels, ticks))) + + self.axis_test(ax.yaxis, ticks, labels, unit_data) From 1075157ab81583d8cc6577886ae6e27471b3a26c Mon Sep 17 00:00:00 2001 From: hannah Date: Wed, 3 Aug 2016 10:17:23 -0400 Subject: [PATCH 2/5] add pytest to travis, appveyor, and tox and py3.5 to tox --- .travis.yml | 2 +- appveyor.yml | 2 +- tox.ini | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6a69c3052713..b6277de0be76 100644 --- a/.travis.yml +++ b/.travis.yml @@ -110,7 +110,7 @@ install: # Install nose from a build which has partial # support for python36 and suport for coverage output suppressing pip install git+https://github.com/jenshnielsen/nose.git@matplotlibnose - + pip install pytest # We manually install humor sans using the package from Ubuntu 14.10. Unfortunatly humor sans is not # availible in the Ubuntu version used by Travis but we can manually install the deb from a later # version since is it basically just a .ttf file diff --git a/appveyor.yml b/appveyor.yml index 58d343641fb5..78752beedf87 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -82,7 +82,7 @@ install: # same things as the requirements in ci/conda_recipe/meta.yaml # if conda-forge gets a new pyqt, it might be nice to install it as well to have more backends # https://github.com/conda-forge/conda-forge.github.io/issues/157#issuecomment-223536381 - - cmd: conda create -q -n test-environment python=%PYTHON_VERSION% pip setuptools numpy python-dateutil freetype=2.6 msinttypes "tk=8.5" pyparsing pytz tornado "libpng>=1.6.21,<1.7" "zlib=1.2" "cycler>=0.10" nose mock + - cmd: conda create -q -n test-environment python=%PYTHON_VERSION% pip setuptools numpy python-dateutil freetype=2.6 msinttypes "tk=8.5" pyparsing pytz tornado "libpng>=1.6.21,<1.7" "zlib=1.2" "cycler>=0.10" nose mock pytest - activate test-environment - cmd: echo %PYTHON_VERSION% %TARGET_ARCH% - cmd: IF %PYTHON_VERSION% == 2.7 conda install -q functools32 diff --git a/tox.ini b/tox.ini index 296cefb56281..f1d3bc8669ca 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py26, py27, py31, py32 +envlist = py26, py27, py31, py32, py35 [testenv] changedir = /tmp @@ -15,3 +15,4 @@ deps = nose mock numpy + pytest \ No newline at end of file From c6eec14320db7f8b8170a68d515a2e7596caaf97 Mon Sep 17 00:00:00 2001 From: hannah Date: Wed, 3 Aug 2016 10:47:39 -0400 Subject: [PATCH 3/5] refactored set_seq and set_locs into 1 --- lib/matplotlib/category.py | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index 9c6d4809959f..0f9009bfafc0 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -101,8 +101,6 @@ def convert_to_string(value): class UnitData(object): # debatable makes sense to special code missing values spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0} - # used to set out of bounds - LOWER_BOUND = -4 def __init__(self, data): """Create mapping between unique categorical values @@ -113,32 +111,26 @@ def __init__(self, data): sequence of values """ self.seq, self.locs = [], [] - self._set_seq(data) - self._set_locs(0) + self._set_seq_locs(data, 0) def update(self, new_data): - self._set_seq(new_data) - value = max(self.locs) - self._set_locs(value + 1) + # so as not to conflict with spdict + value = max(max(self.locs) + 1, 0) + self._set_seq_locs(new_data, value) - def _set_seq(self, data): - #magic to make it work under np1.6 + def _set_seq_locs(self, data, value): + # magic to make it work under np1.6 strdata = to_array(data) # np.unique makes dateframes work - for d in np.unique(strdata): - if d not in self.seq: - self.seq.append(convert_to_string(d)) - self.locs.append(UnitData.LOWER_BOUND) - - def _set_locs(self, value): - for i, s in enumerate(self.seq): - if s in UnitData.spdict.keys(): - self.locs[i] = UnitData.spdict[s] - elif self.locs[i] == UnitData.LOWER_BOUND: - self.locs[i] = value + new_s = [d for d in np.unique(strdata) if d not in self.seq] + for ns in new_s: + self.seq.append(convert_to_string(ns)) + if ns in UnitData.spdict.keys(): + self.locs.append(UnitData.spdict[ns]) + else: + self.locs.append(value) value += 1 - # Connects the convertor to matplotlib units.registry[str] = StrCategoryConverter() units.registry[bytes] = StrCategoryConverter() From 624991c1475111bf760ab2cb9b6e3da99e30b8dd Mon Sep 17 00:00:00 2001 From: hannah Date: Wed, 17 Aug 2016 02:41:09 -0400 Subject: [PATCH 4/5] CategoryNorm --- lib/matplotlib/category.py | 113 ++++++++++++++++++++++---- lib/matplotlib/tests/test_category.py | 14 ++++ 2 files changed, 113 insertions(+), 14 deletions(-) diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index 0f9009bfafc0..a2b8b5e02590 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -9,10 +9,10 @@ import numpy as np +import matplotlib.colors as mcolors import matplotlib.cbook as cbook -import matplotlib.units as units -import matplotlib.ticker as ticker - +import matplotlib.units as munits +import matplotlib.ticker as mticker # pure hack for numpy 1.6 support from distutils.version import LooseVersion @@ -33,11 +33,22 @@ def to_array(data, maxlen=100): return vals -class StrCategoryConverter(units.ConversionInterface): +class StrCategoryConverter(munits.ConversionInterface): + """Converts categorical (or string) data to numerical values + + Conversion typically happens in the following order: + 1. default_units: + creates unit_data category-integer mapping and binds to axis + 2. axis_info: + sets ticks/locator and label/formatter + 3. convert: + maps input category data to integers using unit_data + + """ @staticmethod def convert(value, unit, axis): - """Uses axis.unit_data map to encode - data as floats + """ + Encode value as floats using axis.unit_data """ vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs)) @@ -52,14 +63,22 @@ def convert(value, unit, axis): @staticmethod def axisinfo(unit, axis): + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is None + *axis.unit_data* is used to set ticks and labels + """ majloc = StrCategoryLocator(axis.unit_data.locs) majfmt = StrCategoryFormatter(axis.unit_data.seq) - return units.AxisInfo(majloc=majloc, majfmt=majfmt) + return munits.AxisInfo(majloc=majloc, majfmt=majfmt) @staticmethod def default_units(data, axis): - # the conversion call stack is: - # default_units->axis_info->convert + """ + Create mapping between string categories in *data* + and integers, then store in *axis.unit_data* + """ if axis.unit_data is None: axis.unit_data = UnitData(data) else: @@ -67,18 +86,84 @@ def default_units(data, axis): return None -class StrCategoryLocator(ticker.FixedLocator): +class StrCategoryLocator(mticker.FixedLocator): + """ + Ensures that every category has a tick by subclassing + :class:`~matplotlib.ticker.FixedLocator` + """ def __init__(self, locs): self.locs = locs self.nbins = None -class StrCategoryFormatter(ticker.FixedFormatter): +class StrCategoryFormatter(mticker.FixedFormatter): + """ + Labels every category by subclassing + :class:`~matplotlib.ticker.FixedFormatter` + """ def __init__(self, seq): self.seq = seq self.offset_string = '' +class CategoryNorm(mcolors.Normalize): + """ + Preserves ordering of discrete values + """ + def __init__(self, categories): + """ + *categories* + distinct values for mapping + + Out-of-range values are mapped to a value not in categories; + these are then converted to valid indices by :meth:`Colormap.__call__`. + """ + self.categories = categories + self.N = len(self.categories) + self.vmin = 0 + self.vmax = self.N + self._interp = False + + def __call__(self, value, clip=None): + if not cbook.iterable(value): + value = [value] + + value = np.asarray(value) + ret = np.ones(value.shape) * np.nan + + for i, c in enumerate(self.categories): + ret[value == c] = i / (self.N * 1.0) + + return np.ma.array(ret, mask=np.isnan(ret)) + + def inverse(self, value): + # not quite sure what invertible means in this context + return ValueError("CategoryNorm is not invertible") + + +def colors_from_categories(codings): + """ + Helper routine to generate a cmap and a norm from a list + of (color, value) pairs + + Parameters + ---------- + codings : sequence of (key, value) pairs + + Returns + ------- + (cmap, norm) : tuple containing a :class:`Colormap` and a \ + :class:`Normalize` instance + """ + if isinstance(codings, dict): + codings = codings.items() + + values, colors = zip(*codings) + cmap = mcolors.ListedColormap(list(colors)) + norm = CategoryNorm(list(values)) + return cmap, norm + + def convert_to_string(value): """Helper function for numpy 1.6, can be replaced with np.array(...,dtype=unicode) for all later versions of numpy""" @@ -132,6 +217,6 @@ def _set_seq_locs(self, data, value): value += 1 # Connects the convertor to matplotlib -units.registry[str] = StrCategoryConverter() -units.registry[bytes] = StrCategoryConverter() -units.registry[six.text_type] = StrCategoryConverter() +munits.registry[str] = StrCategoryConverter() +munits.registry[bytes] = StrCategoryConverter() +munits.registry[six.text_type] = StrCategoryConverter() diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index aa6afff11f41..1f84367d4665 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -128,6 +128,20 @@ def test_StrCategoryFormatterUnicode(self): assert labels('a', 1) == "привет" +class TestCategoryNorm(object): + testdata = [[[205, 302, 205, 101], [0, 2. / 3., 0, 1. / 3.]], + [[205, np.nan, 101, 305], [0, 9999, 1. / 3., 2. / 3.]], + [[205, 101, 504, 101], [0, 9999, 1. / 3., 1. / 3.]]] + + ids = ["regular", "nan", "exclude"] + + @pytest.mark.parametrize("data, nmap", testdata, ids=ids) + def test_norm(self, data, nmap): + norm = cat.CategoryNorm([205, 101, 302]) + test = np.ma.masked_equal(nmap, 9999) + np.testing.assert_allclose(norm(data), test) + + def lt(tl): return [l.get_text() for l in tl] From 9707a0f8b6c7b17da8c087b122bd0b5f22e7d1d7 Mon Sep 17 00:00:00 2001 From: hannah Date: Wed, 17 Aug 2016 05:25:04 -0400 Subject: [PATCH 5/5] Categorical mapping via units on norm --- build_alllocal.cmd | 36 ---------- lib/matplotlib/category.py | 100 +++++++++++++++----------- lib/matplotlib/colorbar.py | 5 ++ lib/matplotlib/tests/test_category.py | 30 ++++++-- 4 files changed, 87 insertions(+), 84 deletions(-) delete mode 100644 build_alllocal.cmd diff --git a/build_alllocal.cmd b/build_alllocal.cmd deleted file mode 100644 index 9eb9ceadbc68..000000000000 --- a/build_alllocal.cmd +++ /dev/null @@ -1,36 +0,0 @@ -:: This assumes you have installed all the dependencies via conda packages: -:: # create a new environment with the required packages -:: conda create -n "matplotlib_build" python=3.4 numpy python-dateutil pyparsing pytz tornado "cycler>=0.10" tk libpng zlib freetype -:: activate matplotlib_build -:: if you want qt backend, you also have to install pyqt -:: conda install pyqt -:: # this package is only available in the conda-forge channel -:: conda install -c conda-forge msinttypes -:: if you build on py2.7: -:: conda install -c conda-forge functools32 - -set TARGET=bdist_wheel -IF [%1]==[] ( - echo Using default target: %TARGET% -) else ( - set TARGET=%1 - echo Using user supplied target: %TARGET% -) - -IF NOT DEFINED CONDA_PREFIX ( - echo No Conda env activated: you need to create a conda env with the right packages and activate it! - GOTO:eof -) - -:: copy the libs which have "wrong" names -set LIBRARY_LIB=%CONDA_PREFIX%\Library\lib -mkdir lib || cmd /c "exit /b 0" -copy %LIBRARY_LIB%\zlibstatic.lib lib\z.lib -copy %LIBRARY_LIB%\libpng_static.lib lib\png.lib - -:: Make the header files and the rest of the static libs available during the build -:: CONDA_PREFIX is a env variable which is set to the currently active environment path -set MPLBASEDIRLIST=%CONDA_PREFIX%\Library\;. - -:: build the target -python setup.py %TARGET% diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index a2b8b5e02590..4abdf4b86a4b 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -38,12 +38,11 @@ class StrCategoryConverter(munits.ConversionInterface): Conversion typically happens in the following order: 1. default_units: - creates unit_data category-integer mapping and binds to axis + create unit_data category-integer mapping and binds to axis 2. axis_info: - sets ticks/locator and label/formatter + set ticks/locator and labels/formatter 3. convert: - maps input category data to integers using unit_data - + map input category data to integers using unit_data """ @staticmethod def convert(value, unit, axis): @@ -53,13 +52,13 @@ def convert(value, unit, axis): vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs)) if isinstance(value, six.string_types): - return vmap[value] + return vmap.get(value, None) vals = to_array(value) for lab, loc in vmap.items(): vals[vals == lab] = loc - return vals.astype('float') + return vals.astype('float64') @staticmethod def axisinfo(unit, axis): @@ -74,16 +73,19 @@ def axisinfo(unit, axis): return munits.AxisInfo(majloc=majloc, majfmt=majfmt) @staticmethod - def default_units(data, axis): + def default_units(data, axis, sort=True, normed=False): """ Create mapping between string categories in *data* - and integers, then store in *axis.unit_data* + and integers, and store in *axis.unit_data* """ - if axis.unit_data is None: - axis.unit_data = UnitData(data) - else: - axis.unit_data.update(data) - return None + if axis and axis.unit_data: + axis.unit_data.update(data, sort) + return axis.unit_data + + unit_data = UnitData(data, sort) + if axis: + axis.unit_data = unit_data + return unit_data class StrCategoryLocator(mticker.FixedLocator): @@ -110,35 +112,37 @@ class CategoryNorm(mcolors.Normalize): """ Preserves ordering of discrete values """ - def __init__(self, categories): + def __init__(self, data): """ *categories* distinct values for mapping - Out-of-range values are mapped to a value not in categories; - these are then converted to valid indices by :meth:`Colormap.__call__`. + Out-of-range values are mapped to np.nan """ - self.categories = categories - self.N = len(self.categories) - self.vmin = 0 - self.vmax = self.N - self._interp = False - - def __call__(self, value, clip=None): - if not cbook.iterable(value): - value = [value] - - value = np.asarray(value) - ret = np.ones(value.shape) * np.nan - for i, c in enumerate(self.categories): - ret[value == c] = i / (self.N * 1.0) + self.units = StrCategoryConverter() + self.unit_data = None + self.units.default_units(data, + self, sort=False) + self.loc2seq = dict(zip(self.unit_data.locs, self.unit_data.seq)) + self.vmin = min(self.unit_data.locs) + self.vmax = max(self.unit_data.locs) - return np.ma.array(ret, mask=np.isnan(ret)) + def __call__(self, value, clip=None): + # gonna have to go into imshow and undo casting + value = np.asarray(value, dtype=np.int) + ret = self.units.convert(value, None, self) + # knock out values not in the norm + mask = np.in1d(ret, self.unit_data.locs).reshape(ret.shape) + # normalize ret & locs + ret /= self.vmax + return np.ma.array(ret, mask=~mask) def inverse(self, value): - # not quite sure what invertible means in this context - return ValueError("CategoryNorm is not invertible") + if not cbook.iterable(value): + value = np.asarray(value) + vscaled = np.asarray(value) * self.vmax + return [self.loc2seq[int(vs)] for vs in vscaled] def colors_from_categories(codings): @@ -156,8 +160,7 @@ def colors_from_categories(codings): :class:`Normalize` instance """ if isinstance(codings, dict): - codings = codings.items() - + codings = cbook.sanitize_sequence(codings.items()) values, colors = zip(*codings) cmap = mcolors.ListedColormap(list(colors)) norm = CategoryNorm(list(values)) @@ -184,30 +187,43 @@ def convert_to_string(value): class UnitData(object): - # debatable makes sense to special code missing values + # debatable if it makes sense to special code missing values spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0} - def __init__(self, data): + def __init__(self, data, sort=True): """Create mapping between unique categorical values and numerical identifier Paramters --------- data: iterable sequence of values + sort: bool + sort input data, default is True + False preserves input order """ self.seq, self.locs = [], [] - self._set_seq_locs(data, 0) + self._set_seq_locs(data, 0, sort) + self.sort = sort - def update(self, new_data): + def update(self, new_data, sort=True): + if sort: + self.sort = sort # so as not to conflict with spdict value = max(max(self.locs) + 1, 0) - self._set_seq_locs(new_data, value) + self._set_seq_locs(new_data, value, self.sort) - def _set_seq_locs(self, data, value): + def _set_seq_locs(self, data, value, sort): # magic to make it work under np1.6 strdata = to_array(data) + # np.unique makes dateframes work - new_s = [d for d in np.unique(strdata) if d not in self.seq] + if sort: + unq = np.unique(strdata) + else: + _, idx = np.unique(strdata, return_index=~sort) + unq = strdata[np.sort(idx)] + + new_s = [d for d in unq if d not in self.seq] for ns in new_s: self.seq.append(convert_to_string(ns)) if ns in UnitData.spdict.keys(): diff --git a/lib/matplotlib/colorbar.py b/lib/matplotlib/colorbar.py index 67cdae563d52..dff73fbc519a 100644 --- a/lib/matplotlib/colorbar.py +++ b/lib/matplotlib/colorbar.py @@ -30,6 +30,7 @@ import matplotlib as mpl import matplotlib.artist as martist +import matplotlib.category as category import matplotlib.cbook as cbook import matplotlib.collections as collections import matplotlib.colors as colors @@ -312,6 +313,8 @@ def __init__(self, ax, cmap=None, if format is None: if isinstance(self.norm, colors.LogNorm): self.formatter = ticker.LogFormatterMathtext() + elif isinstance(self.norm, category.CategoryNorm): + self.formatter = ticker.FixedFormatter(self.norm.unit_data.seq) else: self.formatter = ticker.ScalarFormatter() elif cbook.is_string_like(format): @@ -580,6 +583,8 @@ def _ticker(self): locator = ticker.FixedLocator(b, nbins=10) elif isinstance(self.norm, colors.LogNorm): locator = ticker.LogLocator() + elif isinstance(self.norm, category.CategoryNorm): + locator = ticker.FixedLocator(self.norm.unit_data.locs) else: if mpl.rcParams['_internal.classic_mode']: locator = ticker.MaxNLocator() diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index 1f84367d4665..cdf8af1f51a1 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -106,7 +106,7 @@ def test_axisinfo(self): def test_default_units(self): axis = FakeAxis(None) - assert self.cc.default_units(["a"], axis) is None + assert isinstance(self.cc.default_units(["a"], axis), cat.UnitData) class TestStrCategoryLocator(object): @@ -129,17 +129,35 @@ def test_StrCategoryFormatterUnicode(self): class TestCategoryNorm(object): - testdata = [[[205, 302, 205, 101], [0, 2. / 3., 0, 1. / 3.]], - [[205, np.nan, 101, 305], [0, 9999, 1. / 3., 2. / 3.]], - [[205, 101, 504, 101], [0, 9999, 1. / 3., 1. / 3.]]] + testdata = [[[205, 302, 205, 101], [0, 1, 0, .5]], + [[205, np.nan, 101, 305], [0, np.nan, .5, 1]], + [[205, 101, 504, 101], [0, .5, np.nan, .5]]] ids = ["regular", "nan", "exclude"] @pytest.mark.parametrize("data, nmap", testdata, ids=ids) def test_norm(self, data, nmap): norm = cat.CategoryNorm([205, 101, 302]) - test = np.ma.masked_equal(nmap, 9999) - np.testing.assert_allclose(norm(data), test) + masked_nmap = np.ma.masked_equal(nmap, np.nan) + assert np.ma.allequal(norm(data), masked_nmap) + + def test_invert(self): + data = [205, 302, 101] + strdata = ['205', '302', '101'] + value = [0, .5, 1] + norm = cat.CategoryNorm(data) + assert norm.inverse(value) == strdata + + +class TestColorsFromCategories(object): + testdata = [[{'101': "blue", '205': "red", '302': "green"}, dict], + [[('205', "red"), ('101', "blue"), ('302', "green")], list]] + ids = ["dict", "tuple"] + + @pytest.mark.parametrize("codings, mtype", testdata, ids=ids) + def test_colors_from_categories(self, codings, mtype): + cmap, norm = cat.colors_from_categories(codings) + assert mtype(zip(norm.unit_data.seq, cmap.colors)) == codings def lt(tl):