From 36bcf76acf77419274d447c7a2365ee08394a525 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Sat, 7 Oct 2017 22:46:34 -0700 Subject: [PATCH 1/7] Rethink categoricals. Don't support mixed type inputs. Don't sort keys. --- lib/matplotlib/category.py | 130 ++++++++++---------------- lib/matplotlib/tests/test_category.py | 127 +++++++------------------ 2 files changed, 83 insertions(+), 174 deletions(-) diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index d2754d32fd3a..be2f2362412e 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -1,73 +1,40 @@ -# -*- coding: utf-8 OA-*-za -""" -catch all for categorical functions +"""Helpers for categorical data. """ + from __future__ import (absolute_import, division, print_function, unicode_literals) import six -import numpy as np - -import matplotlib.units as units -import matplotlib.ticker as ticker - -# np 1.6/1.7 support -from distutils.version import LooseVersion -import collections +from collections import OrderedDict +import itertools +import numpy as np -if LooseVersion(np.__version__) >= LooseVersion('1.8.0'): - def shim_array(data): - return np.array(data, dtype=np.unicode) -else: - def shim_array(data): - if (isinstance(data, six.string_types) or - not isinstance(data, collections.Iterable)): - data = [data] - try: - data = [str(d) for d in data] - except UnicodeEncodeError: - # this yields gibberish but unicode text doesn't - # render under numpy1.6 anyway - data = [d.encode('utf-8', 'ignore').decode('utf-8') - for d in data] - return np.array(data, dtype=np.unicode) +from matplotlib import cbook, ticker, units class StrCategoryConverter(units.ConversionInterface): @staticmethod def convert(value, unit, axis): - """Uses axis.unit_data map to encode - data as floats - """ - value = np.atleast_1d(value) - # try and update from here.... - if hasattr(axis.unit_data, 'update'): - for val in value: - if isinstance(val, six.string_types): - axis.unit_data.update(val) - vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs)) - - if isinstance(value, six.string_types): - return vmap[value] - - vals = shim_array(value) - - for lab, loc in vmap.items(): - vals[vals == lab] = loc - - return vals.astype('float') + """Uses axis.unit_data map to encode data as floats.""" + # We also need to pass numbers through. + if np.issubdtype(np.asarray(value).dtype.type, np.number): + return value + else: + axis.unit_data.update(value) + return np.vectorize(axis.unit_data._mapping.__getitem__)(value) @staticmethod def axisinfo(unit, axis): - majloc = StrCategoryLocator(axis.unit_data.locs) - majfmt = StrCategoryFormatter(axis.unit_data.seq) - return units.AxisInfo(majloc=majloc, majfmt=majfmt) + # Note that mapping may get mutated by later calls to plotting methods, + # so the locator and formatter must dynamically recompute locs and seq. + return units.AxisInfo( + majloc=StrCategoryLocator(axis.unit_data._mapping), + majfmt=StrCategoryFormatter(axis.unit_data._mapping)) @staticmethod def default_units(data, axis): - # the conversion call stack is: - # default_units->axis_info->convert + # the conversion call stack is default_units->axis_info->convert if axis.unit_data is None: axis.unit_data = UnitData(data) else: @@ -76,48 +43,51 @@ def default_units(data, axis): class StrCategoryLocator(ticker.FixedLocator): - def __init__(self, locs): - self.locs = locs + def __init__(self, mapping): + self._mapping = mapping self.nbins = None + @property + def locs(self): + return list(self._mapping.values()) + class StrCategoryFormatter(ticker.FixedFormatter): - def __init__(self, seq): - self.seq = seq - self.offset_string = '' + def __init__(self, mapping): + self._mapping = mapping + self.offset_string = "" + @property + def seq(self): + out = [] + for key in self._mapping: + # So that we support bytes input. + out.append(key.decode("latin-1") if isinstance(key, bytes) + else key) + return out -class UnitData(object): - # debatable makes sense to special code missing values - spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0} +class UnitData(object): def __init__(self, data): - """Create mapping between unique categorical values - and numerical identifier + """Create mapping between unique categorical values and numerical id. Parameters ---------- data: iterable sequence of values """ - self.seq, self.locs = [], [] - self._set_seq_locs(data, 0) - - def update(self, new_data): - # so as not to conflict with spdict - value = max(max(self.locs) + 1, 0) - self._set_seq_locs(new_data, value) - - def _set_seq_locs(self, data, value): - strdata = shim_array(data) - new_s = [d for d in np.unique(strdata) if d not in self.seq] - for ns in new_s: - self.seq.append(ns) - if ns in UnitData.spdict: - self.locs.append(UnitData.spdict[ns]) - else: - self.locs.append(value) - value += 1 + self._mapping = {} + self._counter = itertools.count() + self.update(data) + + def update(self, data): + if isinstance(data, six.string_types): + data = [data] + sorted_unique = OrderedDict.fromkeys(data) + for s in sorted_unique: + if s in self._mapping: + continue + self._mapping[s] = next(self._counter) # Connects the convertor to matplotlib diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index 7156dc59933c..02fd9a1ba6fa 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -13,36 +13,25 @@ class TestUnitData(object): - testdata = [("hello world", ["hello world"], [0]), - ("Здравствуйте мир", ["Здравствуйте мир"], [0]), - (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf], - ['-inf', '3.14', 'A', 'B', 'inf', 'nan'], - [-3.0, 0, 1, 2, -2.0, -1.0])] + testdata = [("hello world", {"hello world": 0}), + ("Здравствуйте мир", {"Здравствуйте мир": 0})] + ids = ["single", "unicode"] - ids = ["single", "unicode", "mixed"] - - @pytest.mark.parametrize("data, seq, locs", testdata, ids=ids) - def test_unit(self, data, seq, locs): - act = cat.UnitData(data) - assert act.seq == seq - assert act.locs == locs + @pytest.mark.parametrize("data, mapping", testdata, ids=ids) + def test_unit(self, data, mapping): + assert cat.UnitData(data)._mapping == mapping def test_update_map(self): - data = ['a', 'd'] - oseq = ['a', 'd'] - olocs = [0, 1] - - data_update = ['b', 'd', 'e', np.inf] - useq = ['a', 'd', 'b', 'e', 'inf'] - ulocs = [0, 1, 2, 3, -2] + unitdata = cat.UnitData(['a', 'd']) + assert unitdata._mapping == {'a': 0, 'd': 1} + unitdata.update(['b', 'd', 'e']) + assert unitdata._mapping == {'a': 0, 'd': 1, 'b': 2, 'e': 3} - unitdata = cat.UnitData(data) - assert unitdata.seq == oseq - assert unitdata.locs == olocs - unitdata.update(data_update) - assert unitdata.seq == useq - assert unitdata.locs == ulocs +def _mock_unit_data(mapping): + ud = cat.UnitData([]) + ud._mapping.update(mapping) + return ud class FakeAxis(object): @@ -50,29 +39,18 @@ def __init__(self, unit_data): self.unit_data = unit_data -class MockUnitData(object): - def __init__(self, data): - seq, locs = zip(*data) - self.seq = list(seq) - self.locs = list(locs) - - class TestStrCategoryConverter(object): """Based on the pandas conversion and factorization tests: ref: /pandas/tseries/tests/test_converter.py /pandas/tests/test_algos.py:TestFactorize """ - testdata = [("Здравствуйте мир", [("Здравствуйте мир", 42)], 42), - ("hello world", [("hello world", 42)], 42), + testdata = [("Здравствуйте мир", {"Здравствуйте мир": 42}, 42), + ("hello world", {"hello world": 42}, 42), (['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], - [('a', 0), ('b', 1), ('c', 2)], - [0, 1, 1, 0, 0, 2, 2, 2]), - (['A', 'A', np.nan, 'B', -np.inf, 3.14, np.inf], - [('nan', -1), ('3.14', 0), ('A', 1), ('B', 2), - ('-inf', 100), ('inf', 200)], - [1, 1, -1, 2, 100, 0, 200])] - ids = ["unicode", "single", "basic", "mixed"] + {'a': 0, 'b': 1, 'c': 2}, + [0, 1, 1, 0, 0, 2, 2, 2])] + ids = ["unicode", "single", "basic"] @pytest.fixture(autouse=True) def mock_axis(self, request): @@ -80,14 +58,12 @@ def mock_axis(self, request): @pytest.mark.parametrize("data, unitmap, exp", testdata, ids=ids) def test_convert(self, data, unitmap, exp): - MUD = MockUnitData(unitmap) - axis = FakeAxis(MUD) + axis = FakeAxis(_mock_unit_data(unitmap)) act = self.cc.convert(data, None, axis) np.testing.assert_array_equal(act, exp) def test_axisinfo(self): - MUD = MockUnitData([(None, None)]) - axis = FakeAxis(MUD) + axis = FakeAxis(_mock_unit_data({None: None})) ax = self.cc.axisinfo(None, axis) assert isinstance(ax.majloc, cat.StrCategoryLocator) assert isinstance(ax.majfmt, cat.StrCategoryFormatter) @@ -99,8 +75,8 @@ def test_default_units(self): class TestStrCategoryLocator(object): def test_StrCategoryLocator(self): - locs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - ticks = cat.StrCategoryLocator(locs) + locs = list(range(10)) + ticks = cat.StrCategoryLocator({str(x): x for x in locs}) np.testing.assert_array_equal(ticks.tick_values(None, None), locs) @@ -145,27 +121,16 @@ def data(self): self.d = ['a', 'b', 'c', 'a'] self.dticks = [0, 1, 2] self.dlabels = ['a', 'b', 'c'] - unitmap = [('a', 0), ('b', 1), ('c', 2)] - self.dunit_data = MockUnitData(unitmap) - - @pytest.fixture - def missing_data(self): - self.dm = ['here', np.nan, 'here', 'there'] - self.dmticks = [0, -1, 1] - self.dmlabels = ['here', 'nan', 'there'] - unitmap = [('here', 0), ('nan', -1), ('there', 1)] - self.dmunit_data = MockUnitData(unitmap) def axis_test(self, axis, ticks, labels, unit_data): np.testing.assert_array_equal(axis.get_majorticklocs(), ticks) assert lt(axis.get_majorticklabels()) == labels - np.testing.assert_array_equal(axis.unit_data.locs, unit_data.locs) - assert axis.unit_data.seq == unit_data.seq + assert axis.unit_data._mapping == unit_data._mapping def test_plot_unicode(self): words = ['Здравствуйте', 'привет'] locs = [0.0, 1.0] - unit_data = MockUnitData(zip(words, locs)) + unit_data = _mock_unit_data(dict(zip(words, locs))) fig, ax = plt.subplots() ax.plot(words) @@ -179,15 +144,8 @@ def test_plot_1d(self): ax.plot(self.d) fig.canvas.draw() - self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data) - - @pytest.mark.usefixtures("missing_data") - def test_plot_1d_missing(self): - fig, ax = plt.subplots() - ax.plot(self.dm) - fig.canvas.draw() - - self.axis_test(ax.yaxis, self.dmticks, self.dmlabels, self.dmunit_data) + unit_data = _mock_unit_data({'a': 0, 'b': 1, 'c': 2}) + self.axis_test(ax.yaxis, self.dticks, self.dlabels, unit_data) @pytest.mark.usefixtures("data") @pytest.mark.parametrize("bars", bytes_data, ids=bytes_ids) @@ -198,7 +156,8 @@ def test_plot_bytes(self, bars): ax.bar(bars, counts) fig.canvas.draw() - self.axis_test(ax.xaxis, self.dticks, self.dlabels, self.dunit_data) + unit_data = _mock_unit_data(dict(zip(bars, range(3)))) + self.axis_test(ax.xaxis, self.dticks, self.dlabels, unit_data) @pytest.mark.parametrize("bars", numlike_data, ids=numlike_ids) def test_plot_numlike(self, bars): @@ -208,27 +167,8 @@ def test_plot_numlike(self, bars): ax.bar(bars, counts) fig.canvas.draw() - unitmap = MockUnitData([('1', 0), ('11', 1), ('3', 2)]) - self.axis_test(ax.xaxis, [0, 1, 2], ['1', '11', '3'], unitmap) - - @pytest.mark.usefixtures("data", "missing_data") - def test_plot_2d(self): - fig, ax = plt.subplots() - ax.plot(self.dm, self.d) - fig.canvas.draw() - - self.axis_test(ax.xaxis, self.dmticks, self.dmlabels, self.dmunit_data) - self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data) - - @pytest.mark.usefixtures("data", "missing_data") - def test_scatter_2d(self): - - fig, ax = plt.subplots() - ax.scatter(self.dm, self.d) - fig.canvas.draw() - - self.axis_test(ax.xaxis, self.dmticks, self.dmlabels, self.dmunit_data) - self.axis_test(ax.yaxis, self.dticks, self.dlabels, self.dunit_data) + unit_data = _mock_unit_data(dict(zip(bars, range(3)))) + self.axis_test(ax.xaxis, [0, 1, 2], ['1', '11', '3'], unit_data) def test_plot_update(self): fig, ax = plt.subplots() @@ -240,8 +180,7 @@ def test_plot_update(self): labels = ['a', 'b', 'd', 'c'] ticks = [0, 1, 2, 3] - unit_data = MockUnitData(list(zip(labels, ticks))) - + unit_data = _mock_unit_data(dict(zip(labels, ticks))) self.axis_test(ax.yaxis, ticks, labels, unit_data) def test_scatter_update(self): @@ -254,5 +193,5 @@ def test_scatter_update(self): labels = ['a', 'b', 'd', 'c'] ticks = [0, 1, 2, 3] - unit_data = MockUnitData(list(zip(labels, ticks))) + unit_data = _mock_unit_data(dict(zip(labels, ticks))) self.axis_test(ax.xaxis, ticks, labels, unit_data) From e0b0a5291567c2a59dd8e0b502ae454ffafb854d Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Mon, 13 Nov 2017 23:17:16 -0800 Subject: [PATCH 2/7] Make private what can be; rewrite tests. --- lib/matplotlib/axis.py | 13 +- lib/matplotlib/category.py | 72 ++++---- lib/matplotlib/tests/test_category.py | 246 ++++++-------------------- 3 files changed, 89 insertions(+), 242 deletions(-) diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py index 948e3ae1386f..ef7beb39d3b7 100644 --- a/lib/matplotlib/axis.py +++ b/lib/matplotlib/axis.py @@ -668,7 +668,7 @@ def __init__(self, axes, pickradius=15): self.offsetText = self._get_offset_text() self.majorTicks = [] self.minorTicks = [] - self.unit_data = None + self._unit_data = None # Categorical mapping data. self.pickradius = pickradius # Initialize here for testing; later add API @@ -719,17 +719,6 @@ def _set_scale(self, value, **kwargs): def limit_range_for_scale(self, vmin, vmax): return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos()) - @property - def unit_data(self): - """Holds data that a ConversionInterface subclass uses - to convert between labels and indexes - """ - return self._unit_data - - @unit_data.setter - def unit_data(self, unit_data): - self._unit_data = unit_data - def get_children(self): children = [self.label, self.offsetText] majorticks = self.get_major_ticks() diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index be2f2362412e..566318ed4d2d 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -16,78 +16,70 @@ class StrCategoryConverter(units.ConversionInterface): @staticmethod def convert(value, unit, axis): - """Uses axis.unit_data map to encode data as floats.""" + """Encode data as floats.""" # We also need to pass numbers through. if np.issubdtype(np.asarray(value).dtype.type, np.number): return value else: - axis.unit_data.update(value) - return np.vectorize(axis.unit_data._mapping.__getitem__)(value) + axis._unit_data.update(value) + return np.vectorize(axis._unit_data._val_to_idx.__getitem__)(value) @staticmethod def axisinfo(unit, axis): # Note that mapping may get mutated by later calls to plotting methods, # so the locator and formatter must dynamically recompute locs and seq. return units.AxisInfo( - majloc=StrCategoryLocator(axis.unit_data._mapping), - majfmt=StrCategoryFormatter(axis.unit_data._mapping)) + majloc=StrCategoryLocator(axis._unit_data), + majfmt=StrCategoryFormatter(axis._unit_data)) @staticmethod def default_units(data, axis): # the conversion call stack is default_units->axis_info->convert - if axis.unit_data is None: - axis.unit_data = UnitData(data) - else: - axis.unit_data.update(data) + if axis._unit_data is None: + axis._unit_data = _UnitData() + axis._unit_data.update(data) return None -class StrCategoryLocator(ticker.FixedLocator): - def __init__(self, mapping): - self._mapping = mapping - self.nbins = None +class StrCategoryLocator(ticker.Locator): + def __init__(self, unit_data): + self._unit_data = unit_data - @property - def locs(self): - return list(self._mapping.values()) + def __call__(self): + return list(self._unit_data._val_to_idx.values()) -class StrCategoryFormatter(ticker.FixedFormatter): - def __init__(self, mapping): - self._mapping = mapping - self.offset_string = "" +class StrCategoryFormatter(ticker.Formatter): + def __init__(self, unit_data): + self._unit_data = unit_data - @property - def seq(self): - out = [] - for key in self._mapping: - # So that we support bytes input. - out.append(key.decode("latin-1") if isinstance(key, bytes) - else key) - return out + def __call__(self, x, pos=None): + if pos in range(len(self._unit_data._vals)): + s = self._unit_data._vals[pos] + if isinstance(s, bytes): + s = s.decode("latin-1") + return s + else: + return "" -class UnitData(object): - def __init__(self, data): +class _UnitData(object): + def __init__(self): """Create mapping between unique categorical values and numerical id. - - Parameters - ---------- - data: iterable - sequence of values """ - self._mapping = {} + self._vals = [] + self._val_to_idx = OrderedDict() self._counter = itertools.count() - self.update(data) def update(self, data): if isinstance(data, six.string_types): data = [data] sorted_unique = OrderedDict.fromkeys(data) - for s in sorted_unique: - if s in self._mapping: + for val in sorted_unique: + if val in self._val_to_idx: continue - self._mapping[s] = next(self._counter) + self._vals.append(val) + self._val_to_idx[val] = next(self._counter) # Connects the convertor to matplotlib diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index 02fd9a1ba6fa..ce4351dff948 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -5,193 +5,59 @@ import pytest import numpy as np - -import matplotlib.pyplot as plt -import matplotlib.category as cat - -import unittest - - -class TestUnitData(object): - testdata = [("hello world", {"hello world": 0}), - ("Здравствуйте мир", {"Здравствуйте мир": 0})] - ids = ["single", "unicode"] - - @pytest.mark.parametrize("data, mapping", testdata, ids=ids) - def test_unit(self, data, mapping): - assert cat.UnitData(data)._mapping == mapping - - def test_update_map(self): - unitdata = cat.UnitData(['a', 'd']) - assert unitdata._mapping == {'a': 0, 'd': 1} - unitdata.update(['b', 'd', 'e']) - assert unitdata._mapping == {'a': 0, 'd': 1, 'b': 2, 'e': 3} - - -def _mock_unit_data(mapping): - ud = cat.UnitData([]) - ud._mapping.update(mapping) - return ud - - -class FakeAxis(object): - def __init__(self, unit_data): - self.unit_data = unit_data - - -class TestStrCategoryConverter(object): - """Based on the pandas conversion and factorization tests: - - ref: /pandas/tseries/tests/test_converter.py - /pandas/tests/test_algos.py:TestFactorize - """ - testdata = [("Здравствуйте мир", {"Здравствуйте мир": 42}, 42), - ("hello world", {"hello world": 42}, 42), - (['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], - {'a': 0, 'b': 1, 'c': 2}, - [0, 1, 1, 0, 0, 2, 2, 2])] - ids = ["unicode", "single", "basic"] - - @pytest.fixture(autouse=True) - def mock_axis(self, request): - self.cc = cat.StrCategoryConverter() - - @pytest.mark.parametrize("data, unitmap, exp", testdata, ids=ids) - def test_convert(self, data, unitmap, exp): - axis = FakeAxis(_mock_unit_data(unitmap)) - act = self.cc.convert(data, None, axis) - np.testing.assert_array_equal(act, exp) - - def test_axisinfo(self): - axis = FakeAxis(_mock_unit_data({None: None})) - ax = self.cc.axisinfo(None, axis) - assert isinstance(ax.majloc, cat.StrCategoryLocator) - assert isinstance(ax.majfmt, cat.StrCategoryFormatter) - - def test_default_units(self): - axis = FakeAxis(None) - assert self.cc.default_units(["a"], axis) is None - - -class TestStrCategoryLocator(object): - def test_StrCategoryLocator(self): - locs = list(range(10)) - ticks = cat.StrCategoryLocator({str(x): x for x in locs}) - np.testing.assert_array_equal(ticks.tick_values(None, None), locs) - - -class TestStrCategoryFormatter(unittest.TestCase): - def test_StrCategoryFormatter(self): - seq = ["hello", "world", "hi"] - labels = cat.StrCategoryFormatter(seq) - assert labels('a', 1) == "world" - - def test_StrCategoryFormatterUnicode(self): - seq = ["Здравствуйте", "привет"] - labels = cat.StrCategoryFormatter(seq) - assert labels('a', 1) == "привет" - - -def lt(tl): - return [l.get_text() for l in tl] - - -class TestPlot(object): - bytes_data = [ - ['a', 'b', 'c'], - [b'a', b'b', b'c'], - np.array([b'a', b'b', b'c']) - ] - - bytes_ids = ['string list', 'bytes list', 'bytes ndarray'] - - numlike_data = [ - ['1', '11', '3'], - np.array(['1', '11', '3']), - [b'1', b'11', b'3'], - np.array([b'1', b'11', b'3']), - ] - - numlike_ids = [ - 'string list', 'string ndarray', 'bytes list', 'bytes ndarray' - ] - - @pytest.fixture - def data(self): - self.d = ['a', 'b', 'c', 'a'] - self.dticks = [0, 1, 2] - self.dlabels = ['a', 'b', 'c'] - - def axis_test(self, axis, ticks, labels, unit_data): - np.testing.assert_array_equal(axis.get_majorticklocs(), ticks) - assert lt(axis.get_majorticklabels()) == labels - assert axis.unit_data._mapping == unit_data._mapping - - def test_plot_unicode(self): - words = ['Здравствуйте', 'привет'] - locs = [0.0, 1.0] - unit_data = _mock_unit_data(dict(zip(words, locs))) - - fig, ax = plt.subplots() - ax.plot(words) - fig.canvas.draw() - - self.axis_test(ax.yaxis, locs, words, unit_data) - - @pytest.mark.usefixtures("data") - def test_plot_1d(self): - fig, ax = plt.subplots() - ax.plot(self.d) - fig.canvas.draw() - - unit_data = _mock_unit_data({'a': 0, 'b': 1, 'c': 2}) - self.axis_test(ax.yaxis, self.dticks, self.dlabels, unit_data) - - @pytest.mark.usefixtures("data") - @pytest.mark.parametrize("bars", bytes_data, ids=bytes_ids) - def test_plot_bytes(self, bars): - counts = np.array([4, 6, 5]) - - fig, ax = plt.subplots() - ax.bar(bars, counts) - fig.canvas.draw() - - unit_data = _mock_unit_data(dict(zip(bars, range(3)))) - self.axis_test(ax.xaxis, self.dticks, self.dlabels, unit_data) - - @pytest.mark.parametrize("bars", numlike_data, ids=numlike_ids) - def test_plot_numlike(self, bars): - counts = np.array([4, 6, 5]) - - fig, ax = plt.subplots() - ax.bar(bars, counts) - fig.canvas.draw() - - unit_data = _mock_unit_data(dict(zip(bars, range(3)))) - self.axis_test(ax.xaxis, [0, 1, 2], ['1', '11', '3'], unit_data) - - def test_plot_update(self): - fig, ax = plt.subplots() - - ax.plot(['a', 'b']) - ax.plot(['a', 'b', 'd']) - ax.plot(['b', 'c', 'd']) - fig.canvas.draw() - - labels = ['a', 'b', 'd', 'c'] - ticks = [0, 1, 2, 3] - unit_data = _mock_unit_data(dict(zip(labels, ticks))) - self.axis_test(ax.yaxis, ticks, labels, unit_data) - - def test_scatter_update(self): - fig, ax = plt.subplots() - - ax.scatter(['a', 'b'], [0., 3.]) - ax.scatter(['a', 'b', 'd'], [1., 2., 3.]) - ax.scatter(['b', 'c', 'd'], [4., 1., 2.]) - fig.canvas.draw() - - labels = ['a', 'b', 'd', 'c'] - ticks = [0, 1, 2, 3] - unit_data = _mock_unit_data(dict(zip(labels, ticks))) - self.axis_test(ax.xaxis, ticks, labels, unit_data) +from numpy.testing import assert_array_equal + +from matplotlib import category as cat, pyplot as plt +from matplotlib.axes import Axes + + +@pytest.fixture +def ax(): + return plt.figure().subplots() + + +@pytest.mark.parametrize( + "data, expected_indices, expected_labels", + [("hello world", [0], ["hello world"]), + (["Здравствуйте мир"], [0], ["Здравствуйте мир"]), + (["a", "b", "b", "a", "c", "c"], [0, 1, 1, 0, 2, 2], ["a", "b", "c"]), + ([b"foo", b"bar"], range(2), ["foo", "bar"]), + (np.array(["1", "11", "3"]), range(3), ["1", "11", "3"]), + (np.array([b"1", b"11", b"3"]), range(3), ["1", "11", "3"])]) +def test_simple(ax, data, expected_indices, expected_labels): + l, = ax.plot(data) + assert_array_equal(l.get_ydata(orig=False), expected_indices) + assert isinstance(ax.yaxis.major.locator, cat.StrCategoryLocator) + assert isinstance(ax.yaxis.major.formatter, cat.StrCategoryFormatter) + ax.figure.canvas.draw() + labels = [label.get_text() for label in ax.yaxis.get_majorticklabels()] + assert labels == expected_labels + + +def test_default_units(ax): + ax.plot(["a"]) + assert ax.yaxis.converter.default_units(["a"], ax.yaxis) is None + + +def test_update(ax): + l1, = ax.plot(["a", "d"]) + l2, = ax.plot(["b", "d", "e"]) + assert_array_equal(l1.get_ydata(orig=False), [0, 1]) + assert_array_equal(l2.get_ydata(orig=False), [2, 1, 3]) + assert ax.yaxis._unit_data._vals == ["a", "d", "b", "e"] + assert ax.yaxis._unit_data._val_to_idx == {"a": 0, "d": 1, "b": 2, "e": 3} + + +@pytest.mark.parametrize("plotter", [Axes.plot, Axes.scatter, Axes.bar]) +def test_StrCategoryLocator(ax, plotter): + ax.plot(["a", "b", "c"]) + assert_array_equal(ax.yaxis.major.locator(), range(3)) + + +@pytest.mark.parametrize("plotter", [Axes.plot, Axes.scatter, Axes.bar]) +def test_StrCategoryFormatter(ax, plotter): + plotter(ax, range(2), ["hello", "мир"]) + assert ax.yaxis.major.formatter(object(), 0) == "hello" + assert ax.yaxis.major.formatter(object(), 1) == "мир" + assert ax.yaxis.major.formatter(object(), 2) == "" + assert ax.yaxis.major.formatter(object(), None) == "" From 6589adc6676044f7671808db83c711b50f90a316 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 14 Nov 2017 11:19:57 -0800 Subject: [PATCH 3/7] Actually we don't need to store the unit data ourselves. --- lib/matplotlib/axis.py | 1 - lib/matplotlib/category.py | 16 ++++++---------- lib/matplotlib/tests/test_category.py | 7 ++++--- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py index ef7beb39d3b7..b0ddc5beadbc 100644 --- a/lib/matplotlib/axis.py +++ b/lib/matplotlib/axis.py @@ -668,7 +668,6 @@ def __init__(self, axes, pickradius=15): self.offsetText = self._get_offset_text() self.majorTicks = [] self.minorTicks = [] - self._unit_data = None # Categorical mapping data. self.pickradius = pickradius # Initialize here for testing; later add API diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index 566318ed4d2d..564d00eb5f92 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -21,24 +21,20 @@ def convert(value, unit, axis): if np.issubdtype(np.asarray(value).dtype.type, np.number): return value else: - axis._unit_data.update(value) - return np.vectorize(axis._unit_data._val_to_idx.__getitem__)(value) + unit.update(value) + return np.vectorize(unit._val_to_idx.__getitem__)(value) @staticmethod def axisinfo(unit, axis): # Note that mapping may get mutated by later calls to plotting methods, # so the locator and formatter must dynamically recompute locs and seq. return units.AxisInfo( - majloc=StrCategoryLocator(axis._unit_data), - majfmt=StrCategoryFormatter(axis._unit_data)) + majloc=StrCategoryLocator(unit), + majfmt=StrCategoryFormatter(unit)) @staticmethod def default_units(data, axis): - # the conversion call stack is default_units->axis_info->convert - if axis._unit_data is None: - axis._unit_data = _UnitData() - axis._unit_data.update(data) - return None + return _CategoricalUnit() class StrCategoryLocator(ticker.Locator): @@ -63,7 +59,7 @@ def __call__(self, x, pos=None): return "" -class _UnitData(object): +class _CategoricalUnit(object): def __init__(self): """Create mapping between unique categorical values and numerical id. """ diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index ce4351dff948..e8d87682002f 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -36,7 +36,8 @@ def test_simple(ax, data, expected_indices, expected_labels): def test_default_units(ax): ax.plot(["a"]) - assert ax.yaxis.converter.default_units(["a"], ax.yaxis) is None + du = ax.yaxis.converter.default_units(["a"], ax.yaxis) + assert isinstance(du, cat._CategoricalUnit) def test_update(ax): @@ -44,8 +45,8 @@ def test_update(ax): l2, = ax.plot(["b", "d", "e"]) assert_array_equal(l1.get_ydata(orig=False), [0, 1]) assert_array_equal(l2.get_ydata(orig=False), [2, 1, 3]) - assert ax.yaxis._unit_data._vals == ["a", "d", "b", "e"] - assert ax.yaxis._unit_data._val_to_idx == {"a": 0, "d": 1, "b": 2, "e": 3} + assert ax.yaxis.units._vals == ["a", "d", "b", "e"] + assert ax.yaxis.units._val_to_idx == {"a": 0, "d": 1, "b": 2, "e": 3} @pytest.mark.parametrize("plotter", [Axes.plot, Axes.scatter, Axes.bar]) From 5dfe4d14e045f2ac221c8d0468b8aade87185495 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 14 Nov 2017 13:23:22 -0800 Subject: [PATCH 4/7] Force unit conversion before call to asarray(). Note that this disables post-hoc unit changes on Line2D. --- lib/matplotlib/axes/_base.py | 31 +++++++++---------------------- lib/matplotlib/category.py | 2 ++ lib/matplotlib/cbook/__init__.py | 3 +-- 3 files changed, 12 insertions(+), 24 deletions(-) diff --git a/lib/matplotlib/axes/_base.py b/lib/matplotlib/axes/_base.py index dce69ff8e27e..13ff1dc4d0ea 100644 --- a/lib/matplotlib/axes/_base.py +++ b/lib/matplotlib/axes/_base.py @@ -216,24 +216,10 @@ def _xy_from_xy(self, x, y): if self.axes.xaxis is not None and self.axes.yaxis is not None: bx = self.axes.xaxis.update_units(x) by = self.axes.yaxis.update_units(y) - - if self.command != 'plot': - # the Line2D class can handle unitized data, with - # support for post hoc unit changes etc. Other mpl - # artists, e.g., Polygon which _process_plot_var_args - # also serves on calls to fill, cannot. So this is a - # hack to say: if you are not "plot", which is - # creating Line2D, then convert the data now to - # floats. If you are plot, pass the raw data through - # to Line2D which will handle the conversion. So - # polygons will not support post hoc conversions of - # the unit type since they are not storing the orig - # data. Hopefully we can rationalize this at a later - # date - JDH - if bx: - x = self.axes.convert_xunits(x) - if by: - y = self.axes.convert_yunits(y) + if bx: + x = self.axes.convert_xunits(x) + if by: + y = self.axes.convert_yunits(y) # like asanyarray, but converts scalar to array, and doesn't change # existing compatible sequences @@ -376,11 +362,12 @@ def _plot_args(self, tup, kwargs): if 'label' not in kwargs or kwargs['label'] is None: kwargs['label'] = get_label(tup[-1], None) - if len(tup) == 2: - x = _check_1d(tup[0]) - y = _check_1d(tup[-1]) + if len(tup) == 1: + x, y = index_of(tup[0]) + elif len(tup) == 2: + x, y = tup else: - x, y = index_of(tup[-1]) + assert False x, y = self._xy_from_xy(x, y) diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index 564d00eb5f92..4d4b95f43f41 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -74,6 +74,8 @@ def update(self, data): for val in sorted_unique: if val in self._val_to_idx: continue + if not isinstance(val, (six.text_type, six.binary_type)): + raise TypeError("Not a string") self._vals.append(val) self._val_to_idx[val] = next(self._counter) diff --git a/lib/matplotlib/cbook/__init__.py b/lib/matplotlib/cbook/__init__.py index 0b6a4968b113..0ad420ac0097 100644 --- a/lib/matplotlib/cbook/__init__.py +++ b/lib/matplotlib/cbook/__init__.py @@ -2295,8 +2295,7 @@ def index_of(y): try: return y.index.values, y.values except AttributeError: - y = _check_1d(y) - return np.arange(y.shape[0], dtype=float), y + return np.arange(len(y)), y def safe_first_element(obj): From beef2d339627c01a0076f8eed548c5c94dc2a214 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 14 Nov 2017 14:04:31 -0800 Subject: [PATCH 5/7] Reattach the unitful data to Line2D. --- lib/matplotlib/axes/_base.py | 42 +++++++++++++++++++++------ lib/matplotlib/cbook/__init__.py | 2 +- lib/matplotlib/tests/test_category.py | 3 +- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/lib/matplotlib/axes/_base.py b/lib/matplotlib/axes/_base.py index 13ff1dc4d0ea..170f421f874a 100644 --- a/lib/matplotlib/axes/_base.py +++ b/lib/matplotlib/axes/_base.py @@ -369,20 +369,44 @@ def _plot_args(self, tup, kwargs): else: assert False - x, y = self._xy_from_xy(x, y) - - if self.command == 'plot': - func = self._makeline + deunitized_x, deunitized_y = self._xy_from_xy(x, y) + # The previous call has registered the converters, if any, on the axes. + # This check will need to be replaced by a comparison with the + # DefaultConverter when that PR goes in. + if self.axes.xaxis.converter is None or self.command is not "plot": + xt, yt = deunitized_x.T, deunitized_y.T else: - kw['closed'] = kwargs.get('closed', True) - func = self._makefill - - ncx, ncy = x.shape[1], y.shape[1] + # np.asarray would destroy unit information so we need to construct + # the 1D arrays to pass to Line2D.set_xdata manually... (but this + # is only relevant if the command is "plot"). + + def to_list_of_lists(data): + ndim = np.ndim(data) + if ndim == 0: + return [[data]] + elif ndim == 1: + return [data] + elif ndim == 2: + return zip(*data) # Transpose it. + + xt, yt = map(to_list_of_lists, [x, y]) + + ncx, ncy = deunitized_x.shape[1], deunitized_y.shape[1] if ncx > 1 and ncy > 1 and ncx != ncy: cbook.warn_deprecated("2.2", "cycling among columns of inputs " "with non-matching shapes is deprecated.") for j in xrange(max(ncx, ncy)): - seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs) + if self.command == "plot": + seg = self._makeline([], [], kw, kwargs) + # This ensures that the line remembers both the unitized and + # deunitized data. + seg.set_xdata(xt[j % ncx]) + seg.set_ydata(yt[j % ncy]) + else: + kw['closed'] = kwargs.get('closed', True) + seg = self._makefill(deunitized_x[:, j % ncx], + deunitized_y[:, j % ncy], + kw, kwargs) ret.append(seg) return ret diff --git a/lib/matplotlib/cbook/__init__.py b/lib/matplotlib/cbook/__init__.py index 0ad420ac0097..11c414da059d 100644 --- a/lib/matplotlib/cbook/__init__.py +++ b/lib/matplotlib/cbook/__init__.py @@ -2295,7 +2295,7 @@ def index_of(y): try: return y.index.values, y.values except AttributeError: - return np.arange(len(y)), y + return np.arange((np.shape(y) or (1,))[0]), y def safe_first_element(obj): diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index e8d87682002f..39cc35549eb4 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -18,8 +18,7 @@ def ax(): @pytest.mark.parametrize( "data, expected_indices, expected_labels", - [("hello world", [0], ["hello world"]), - (["Здравствуйте мир"], [0], ["Здравствуйте мир"]), + [(["Здравствуйте мир"], [0], ["Здравствуйте мир"]), (["a", "b", "b", "a", "c", "c"], [0, 1, 1, 0, 2, 2], ["a", "b", "c"]), ([b"foo", b"bar"], range(2), ["foo", "bar"]), (np.array(["1", "11", "3"]), range(3), ["1", "11", "3"]), From 07cb765b6b9a6cd04d3b76cf96c70547c11e7fff Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 14 Nov 2017 16:49:38 -0800 Subject: [PATCH 6/7] Fixes based on phone discussion. --- lib/matplotlib/axes/_base.py | 6 +----- lib/matplotlib/axis.py | 10 ++++++++++ lib/matplotlib/category.py | 17 +++++++++-------- lib/matplotlib/cbook/__init__.py | 3 ++- lib/matplotlib/tests/test_category.py | 2 +- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/lib/matplotlib/axes/_base.py b/lib/matplotlib/axes/_base.py index 170f421f874a..4824f4faa955 100644 --- a/lib/matplotlib/axes/_base.py +++ b/lib/matplotlib/axes/_base.py @@ -397,11 +397,7 @@ def to_list_of_lists(data): "with non-matching shapes is deprecated.") for j in xrange(max(ncx, ncy)): if self.command == "plot": - seg = self._makeline([], [], kw, kwargs) - # This ensures that the line remembers both the unitized and - # deunitized data. - seg.set_xdata(xt[j % ncx]) - seg.set_ydata(yt[j % ncy]) + seg = self._makeline(xt[j % ncx], yt[j % ncy], kw, kwargs) else: kw['closed'] = kwargs.get('closed', True) seg = self._makefill(deunitized_x[:, j % ncx], diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py index b0ddc5beadbc..744dcbf65d08 100644 --- a/lib/matplotlib/axis.py +++ b/lib/matplotlib/axis.py @@ -718,6 +718,16 @@ def _set_scale(self, value, **kwargs): def limit_range_for_scale(self, vmin, vmax): return self._scale.limit_range_for_scale(vmin, vmax, self.get_minpos()) + @property + @cbook.deprecated("2.1.1") + def unit_data(self): + return self.units + + @unit_data.setter + @cbook.deprecated("2.1.1") + def unit_data(self, value): + self.set_units = value + def get_children(self): children = [self.label, self.offsetText] majorticks = self.get_major_ticks() diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index 4d4b95f43f41..67ad9d7956b1 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -34,7 +34,7 @@ def axisinfo(unit, axis): @staticmethod def default_units(data, axis): - return _CategoricalUnit() + return UnitData() class StrCategoryLocator(ticker.Locator): @@ -53,24 +53,25 @@ def __call__(self, x, pos=None): if pos in range(len(self._unit_data._vals)): s = self._unit_data._vals[pos] if isinstance(s, bytes): - s = s.decode("latin-1") + s = s.decode("utf-8") return s else: return "" -class _CategoricalUnit(object): - def __init__(self): +class UnitData(object): + def __init__(self, data=()): """Create mapping between unique categorical values and numerical id. """ self._vals = [] self._val_to_idx = OrderedDict() self._counter = itertools.count() + self.update(data) - def update(self, data): - if isinstance(data, six.string_types): - data = [data] - sorted_unique = OrderedDict.fromkeys(data) + def update(self, new_data): + if isinstance(new_data, six.string_types): + new_data = [new_data] + sorted_unique = OrderedDict.fromkeys(new_data) for val in sorted_unique: if val in self._val_to_idx: continue diff --git a/lib/matplotlib/cbook/__init__.py b/lib/matplotlib/cbook/__init__.py index 11c414da059d..28bbeb965f03 100644 --- a/lib/matplotlib/cbook/__init__.py +++ b/lib/matplotlib/cbook/__init__.py @@ -2295,7 +2295,8 @@ def index_of(y): try: return y.index.values, y.values except AttributeError: - return np.arange((np.shape(y) or (1,))[0]), y + # Ensure that scalar y gives x == [0]. + return np.arange((np.shape(y) or (1,))[0], dtype=float), y def safe_first_element(obj): diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py index 39cc35549eb4..4f990b182e98 100644 --- a/lib/matplotlib/tests/test_category.py +++ b/lib/matplotlib/tests/test_category.py @@ -36,7 +36,7 @@ def test_simple(ax, data, expected_indices, expected_labels): def test_default_units(ax): ax.plot(["a"]) du = ax.yaxis.converter.default_units(["a"], ax.yaxis) - assert isinstance(du, cat._CategoricalUnit) + assert isinstance(du, cat.UnitData) def test_update(ax): From 59887f51b3884a85e36e477ecd09394c206e05ae Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 14 Nov 2017 20:24:04 -0800 Subject: [PATCH 7/7] Deprecate some more stuff we don't need. --- lib/matplotlib/category.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py index 67ad9d7956b1..171a2a6ef65f 100644 --- a/lib/matplotlib/category.py +++ b/lib/matplotlib/category.py @@ -66,6 +66,10 @@ def __init__(self, data=()): self._vals = [] self._val_to_idx = OrderedDict() self._counter = itertools.count() + if np.size(data): + cbook.warn_deprecated( + "2.1.1", + "Passing data to the UnitData constructor is deprecated.") self.update(data) def update(self, new_data):