Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[WIP] Categorical Color Mapping #6934

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ install:
# Install nose from a build which has partial
# support for python36 and suport for coverage output suppressing
pip install git+https://github.com/jenshnielsen/nose.git@matplotlibnose

pip install pytest
# We manually install humor sans using the package from Ubuntu 14.10. Unfortunatly humor sans is not
# availible in the Ubuntu version used by Travis but we can manually install the deb from a later
# version since is it basically just a .ttf file
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ install:
# same things as the requirements in ci/conda_recipe/meta.yaml
# if conda-forge gets a new pyqt, it might be nice to install it as well to have more backends
# https://github.com/conda-forge/conda-forge.github.io/issues/157#issuecomment-223536381
- cmd: conda create -q -n test-environment python=%PYTHON_VERSION% pip setuptools numpy python-dateutil freetype=2.6 msinttypes "tk=8.5" pyparsing pytz tornado "libpng>=1.6.21,<1.7" "zlib=1.2" "cycler>=0.10" nose mock
- cmd: conda create -q -n test-environment python=%PYTHON_VERSION% pip setuptools numpy python-dateutil freetype=2.6 msinttypes "tk=8.5" pyparsing pytz tornado "libpng>=1.6.21,<1.7" "zlib=1.2" "cycler>=0.10" nose mock pytest
- activate test-environment
- cmd: echo %PYTHON_VERSION% %TARGET_ARCH%
- cmd: IF %PYTHON_VERSION% == 2.7 conda install -q functools32
Expand Down
36 changes: 0 additions & 36 deletions build_alllocal.cmd

This file was deleted.

8 changes: 4 additions & 4 deletions lib/matplotlib/axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def __init__(self, axes, pickradius=15):
self.offsetText = self._get_offset_text()
self.majorTicks = []
self.minorTicks = []
self.unit_data = []
self.unit_data = None
self.pickradius = pickradius

# Initialize here for testing; later add API
Expand Down Expand Up @@ -695,14 +695,14 @@ def limit_range_for_scale(self, vmin, vmax):

@property
def unit_data(self):
"""Holds data that a ConversionInterface subclass relys on
"""Holds data that a ConversionInterface subclass uses
to convert between labels and indexes
"""
return self._unit_data

@unit_data.setter
def unit_data(self, data):
self._unit_data = data
def unit_data(self, unit_data):
self._unit_data = unit_data

def get_children(self):
children = [self.label, self.offsetText]
Expand Down
247 changes: 167 additions & 80 deletions lib/matplotlib/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@

import numpy as np

import matplotlib.units as units
import matplotlib.ticker as ticker

import matplotlib.colors as mcolors
import matplotlib.cbook as cbook
import matplotlib.units as munits
import matplotlib.ticker as mticker

# pure hack for numpy 1.6 support
from distutils.version import LooseVersion
Expand All @@ -22,63 +23,157 @@
def to_array(data, maxlen=100):
if NP_NEW:
return np.array(data, dtype=np.unicode)
if cbook.is_scalar_or_string(data):
data = [data]
try:
vals = np.array(data, dtype=('|S', maxlen))
except UnicodeEncodeError:
# pure hack
# this yields gibberish
vals = np.array([convert_to_string(d) for d in data])
return vals


class StrCategoryConverter(units.ConversionInterface):
class StrCategoryConverter(munits.ConversionInterface):
"""Converts categorical (or string) data to numerical values

Conversion typically happens in the following order:
1. default_units:
create unit_data category-integer mapping and binds to axis
2. axis_info:
set ticks/locator and labels/formatter
3. convert:
map input category data to integers using unit_data
"""
@staticmethod
def convert(value, unit, axis):
"""Uses axis.unit_data map to encode
data as floats
"""
vmap = dict(axis.unit_data)
Encode value as floats using axis.unit_data
"""
vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs))

if isinstance(value, six.string_types):
return vmap[value]
return vmap.get(value, None)

vals = to_array(value)
for lab, loc in axis.unit_data:
for lab, loc in vmap.items():
vals[vals == lab] = loc

return vals.astype('float')
return vals.astype('float64')

@staticmethod
def axisinfo(unit, axis):
seq, locs = zip(*axis.unit_data)
majloc = StrCategoryLocator(locs)
majfmt = StrCategoryFormatter(seq)
return units.AxisInfo(majloc=majloc, majfmt=majfmt)
"""
Return the :class:`~matplotlib.units.AxisInfo` for *unit*.

*unit* is None
*axis.unit_data* is used to set ticks and labels
"""
majloc = StrCategoryLocator(axis.unit_data.locs)
majfmt = StrCategoryFormatter(axis.unit_data.seq)
return munits.AxisInfo(majloc=majloc, majfmt=majfmt)

@staticmethod
def default_units(data, axis):
# the conversion call stack is:
# default_units->axis_info->convert
axis.unit_data = map_categories(data, axis.unit_data)
return None
def default_units(data, axis, sort=True, normed=False):
"""
Create mapping between string categories in *data*
and integers, and store in *axis.unit_data*
"""
if axis and axis.unit_data:
axis.unit_data.update(data, sort)
return axis.unit_data

unit_data = UnitData(data, sort)
if axis:
axis.unit_data = unit_data
return unit_data


class StrCategoryLocator(ticker.FixedLocator):
class StrCategoryLocator(mticker.FixedLocator):
"""
Ensures that every category has a tick by subclassing
:class:`~matplotlib.ticker.FixedLocator`
"""
def __init__(self, locs):
super(StrCategoryLocator, self).__init__(locs, None)
self.locs = locs
self.nbins = None


class StrCategoryFormatter(ticker.FixedFormatter):
class StrCategoryFormatter(mticker.FixedFormatter):
"""
Labels every category by subclassing
:class:`~matplotlib.ticker.FixedFormatter`
"""
def __init__(self, seq):
super(StrCategoryFormatter, self).__init__(seq)
self.seq = seq
self.offset_string = ''


class CategoryNorm(mcolors.Normalize):
"""
Preserves ordering of discrete values
"""
def __init__(self, data):
"""
*categories*
distinct values for mapping

Out-of-range values are mapped to np.nan
"""

self.units = StrCategoryConverter()
self.unit_data = None
self.units.default_units(data,
self, sort=False)
self.loc2seq = dict(zip(self.unit_data.locs, self.unit_data.seq))
self.vmin = min(self.unit_data.locs)
self.vmax = max(self.unit_data.locs)

def __call__(self, value, clip=None):
# gonna have to go into imshow and undo casting
value = np.asarray(value, dtype=np.int)
ret = self.units.convert(value, None, self)
# knock out values not in the norm
mask = np.in1d(ret, self.unit_data.locs).reshape(ret.shape)
# normalize ret & locs
ret /= self.vmax
return np.ma.array(ret, mask=~mask)

def inverse(self, value):
if not cbook.iterable(value):
value = np.asarray(value)
vscaled = np.asarray(value) * self.vmax
return [self.loc2seq[int(vs)] for vs in vscaled]


def colors_from_categories(codings):
"""
Helper routine to generate a cmap and a norm from a list
of (color, value) pairs

Parameters
----------
codings : sequence of (key, value) pairs

Returns
-------
(cmap, norm) : tuple containing a :class:`Colormap` and a \
:class:`Normalize` instance
"""
if isinstance(codings, dict):
codings = cbook.sanitize_sequence(codings.items())
values, colors = zip(*codings)
cmap = mcolors.ListedColormap(list(colors))
norm = CategoryNorm(list(values))
return cmap, norm


def convert_to_string(value):
"""Helper function for numpy 1.6, can be replaced with
np.array(...,dtype=unicode) for all later versions of numpy"""

if isinstance(value, six.string_types):
return value
if np.isfinite(value):
pass
elif np.isfinite(value):
value = np.asarray(value, dtype=str)[np.newaxis][0]
elif np.isnan(value):
value = 'nan'
Expand All @@ -91,61 +186,53 @@ def convert_to_string(value):
return value


def map_categories(data, old_map=None):
"""Create mapping between unique categorical
values and numerical identifier.

Paramters
---------
data: iterable
sequence of values
old_map: list of tuple, optional
if not `None`, than old_mapping will be updated with new values and
previous mappings will remain unchanged)
sort: bool, optional
sort keys by ASCII value

Returns
-------
list of tuple
[(label, ticklocation),...]

"""

# code typical missing data in the negative range because
# everything else will always have positive encoding
# question able if it even makes sense
class UnitData(object):
# debatable if it makes sense to special code missing values
spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0}

if isinstance(data, six.string_types):
data = [data]

# will update this post cbook/dict support
strdata = to_array(data)
uniq = np.unique(strdata)

if old_map:
olabs, okeys = zip(*old_map)
svalue = max(okeys) + 1
else:
old_map, olabs, okeys = [], [], []
svalue = 0

category_map = old_map[:]

new_labs = [u for u in uniq if u not in olabs]
missing = [nl for nl in new_labs if nl in spdict.keys()]

category_map.extend([(m, spdict[m]) for m in missing])

new_labs = [nl for nl in new_labs if nl not in missing]

new_locs = np.arange(svalue, svalue + len(new_labs), dtype='float')
category_map.extend(list(zip(new_labs, new_locs)))
return category_map

def __init__(self, data, sort=True):
"""Create mapping between unique categorical values
and numerical identifier
Paramters
---------
data: iterable
sequence of values
sort: bool
sort input data, default is True
False preserves input order
"""
self.seq, self.locs = [], []
self._set_seq_locs(data, 0, sort)
self.sort = sort

def update(self, new_data, sort=True):
if sort:
self.sort = sort
# so as not to conflict with spdict
value = max(max(self.locs) + 1, 0)
self._set_seq_locs(new_data, value, self.sort)

def _set_seq_locs(self, data, value, sort):
# magic to make it work under np1.6
strdata = to_array(data)

# np.unique makes dateframes work
if sort:
unq = np.unique(strdata)
else:
_, idx = np.unique(strdata, return_index=~sort)
unq = strdata[np.sort(idx)]

new_s = [d for d in unq if d not in self.seq]
for ns in new_s:
self.seq.append(convert_to_string(ns))
if ns in UnitData.spdict.keys():
self.locs.append(UnitData.spdict[ns])
else:
self.locs.append(value)
value += 1

# Connects the convertor to matplotlib
units.registry[str] = StrCategoryConverter()
units.registry[bytes] = StrCategoryConverter()
units.registry[six.text_type] = StrCategoryConverter()
munits.registry[str] = StrCategoryConverter()
munits.registry[bytes] = StrCategoryConverter()
munits.registry[six.text_type] = StrCategoryConverter()
Loading