diff --git a/lib/matplotlib/_mathtext.py b/lib/matplotlib/_mathtext.py index 3feb70f59736..88619f2485a8 100644 --- a/lib/matplotlib/_mathtext.py +++ b/lib/matplotlib/_mathtext.py @@ -12,9 +12,9 @@ import numpy as np from pyparsing import ( - Combine, Empty, Forward, Group, Literal, oneOf, OneOrMore, - Optional, ParseBaseException, ParseFatalException, ParserElement, - ParseResults, QuotedString, Regex, StringEnd, Suppress, White, ZeroOrMore) + Empty, Forward, NotAny, oneOf, OneOrMore, Optional, ParseBaseException, + ParseFatalException, ParserElement, ParseResults, QuotedString, Regex, + StringEnd, ZeroOrMore, pyparsing_common) import matplotlib as mpl from . import _api, cbook @@ -1948,10 +1948,8 @@ def __init__(self): # All forward declarations are here p.accent = Forward() p.ambi_delim = Forward() - p.apostrophe = Forward() p.auto_delim = Forward() p.binom = Forward() - p.bslash = Forward() p.customspace = Forward() p.end_group = Forward() p.float_literal = Forward() @@ -1961,11 +1959,7 @@ def __init__(self): p.function = Forward() p.genfrac = Forward() p.group = Forward() - p.int_literal = Forward() - p.latexfont = Forward() - p.lbracket = Forward() p.left_delim = Forward() - p.lbrace = Forward() p.main = Forward() p.math = Forward() p.math_string = Forward() @@ -1974,11 +1968,8 @@ def __init__(self): p.overline = Forward() p.overset = Forward() p.placeable = Forward() - p.rbrace = Forward() - p.rbracket = Forward() p.required_group = Forward() p.right_delim = Forward() - p.right_delim_safe = Forward() p.simple = Forward() p.simple_group = Forward() p.single_symbol = Forward() @@ -1987,142 +1978,99 @@ def __init__(self): p.sqrt = Forward() p.start_group = Forward() p.subsuper = Forward() - p.subsuperop = Forward() p.symbol = Forward() p.symbol_name = Forward() p.token = Forward() p.underset = Forward() p.unknown_symbol = Forward() - # Set names on everything -- very useful for debugging for key, val in vars(p).items(): if not key.startswith('_'): + # Set names on everything -- very useful for debugging val.setName(key) + # Set actions + if hasattr(self, key): + val.setParseAction(getattr(self, key)) p.float_literal <<= Regex(r"[-+]?([0-9]+\.?[0-9]*|\.[0-9]+)") - p.int_literal <<= Regex("[-+]?[0-9]+") - - p.lbrace <<= Literal('{').suppress() - p.rbrace <<= Literal('}').suppress() - p.lbracket <<= Literal('[').suppress() - p.rbracket <<= Literal(']').suppress() - p.bslash <<= Literal('\\') - - p.space <<= oneOf(list(self._space_widths)) - p.customspace <<= ( - Suppress(Literal(r'\hspace')) - - ((p.lbrace + p.float_literal + p.rbrace) - | Error(r"Expected \hspace{n}")) - ) - unicode_range = "\U00000080-\U0001ffff" + p.space <<= oneOf(self._space_widths)("space") + p.customspace <<= r"\hspace" - ( + "{" + p.float_literal("space") + "}" + | Error(r"Expected \hspace{n}")) + p.single_symbol <<= Regex( r"([a-zA-Z0-9 +\-*/<>=:,.;!\?&'@()\[\]|%s])|(\\[%%${}\[\]_|])" % - unicode_range) - p.accentprefixed <<= Suppress(p.bslash) + oneOf(self._accentprefixed) + "\U00000080-\U0001ffff" # unicode range + )("sym") + p.accentprefixed <<= "\\" + oneOf(self._accentprefixed)("sym") p.symbol_name <<= ( - Combine(p.bslash + oneOf(list(tex2uni))) - + Suppress(Regex("(?=[^A-Za-z]|$)").leaveWhitespace()) - ) + oneOf([rf"\{sym}" for sym in tex2uni])("sym") + + Regex("(?=[^A-Za-z]|$)").leaveWhitespace()) p.symbol <<= (p.single_symbol | p.symbol_name).leaveWhitespace() - p.apostrophe <<= Regex("'+") - - p.accent <<= Group( - Suppress(p.bslash) - + oneOf([*self._accent_map, *self._wide_accents]) - + Suppress(Optional(White())) - - p.placeable - ) - - p.function <<= ( - Suppress(p.bslash) - + oneOf(list(self._function_names)) - ) - - p.start_group <<= Optional(p.latexfont) + p.lbrace - p.end_group <<= p.rbrace.copy() - p.simple_group <<= Group(p.lbrace + ZeroOrMore(p.token) + p.rbrace) - p.required_group <<= Group(p.lbrace + OneOrMore(p.token) + p.rbrace) - p.group <<= Group( - p.start_group + ZeroOrMore(p.token) + p.end_group - ) - - p.font <<= Suppress(p.bslash) + oneOf(list(self._fontnames)) - p.latexfont <<= ( - Suppress(p.bslash) - + oneOf(['math' + x for x in self._fontnames]) - ) - - p.frac <<= Group( - Suppress(Literal(r"\frac")) - - ((p.required_group + p.required_group) - | Error(r"Expected \frac{num}{den}")) - ) - - p.dfrac <<= Group( - Suppress(Literal(r"\dfrac")) - - ((p.required_group + p.required_group) - | Error(r"Expected \dfrac{num}{den}")) - ) - - p.binom <<= Group( - Suppress(Literal(r"\binom")) - - ((p.required_group + p.required_group) - | Error(r"Expected \binom{num}{den}")) - ) - - p.ambi_delim <<= oneOf(list(self._ambi_delim)) - p.left_delim <<= oneOf(list(self._left_delim)) - p.right_delim <<= oneOf(list(self._right_delim)) - p.right_delim_safe <<= oneOf([*(self._right_delim - {'}'}), r'\}']) - - p.genfrac <<= Group( - Suppress(Literal(r"\genfrac")) - - (((p.lbrace - + Optional(p.ambi_delim | p.left_delim, default='') - + p.rbrace) - + (p.lbrace - + Optional(p.ambi_delim | p.right_delim_safe, default='') - + p.rbrace) - + (p.lbrace + p.float_literal + p.rbrace) - + p.simple_group + p.required_group + p.required_group) - | Error("Expected " - r"\genfrac{ldelim}{rdelim}{rulesize}{style}{num}{den}")) - ) - - p.sqrt <<= Group( - Suppress(Literal(r"\sqrt")) - - ((Group(Optional( - p.lbracket + OneOrMore(~p.rbracket + p.token) + p.rbracket)) - + p.required_group) - | Error("Expected \\sqrt{value}")) - ) - - p.overline <<= Group( - Suppress(Literal(r"\overline")) - - (p.required_group | Error("Expected \\overline{value}")) - ) - - p.overset <<= Group( - Suppress(Literal(r"\overset")) - - ((p.simple_group + p.simple_group) - | Error("Expected \\overset{body}{annotation}")) - ) - - p.underset <<= Group( - Suppress(Literal(r"\underset")) - - ((p.simple_group + p.simple_group) - | Error("Expected \\underset{body}{annotation}")) - ) - - p.unknown_symbol <<= Combine(p.bslash + Regex("[A-Za-z]*")) - - p.operatorname <<= Group( - Suppress(Literal(r"\operatorname")) - - ((p.lbrace + ZeroOrMore(p.simple | p.unknown_symbol) + p.rbrace) - | Error("Expected \\operatorname{value}")) - ) + p.accent <<= ( + "\\" + + oneOf([*self._accent_map, *self._wide_accents])("accent") + - p.placeable("sym")) + + p.function <<= "\\" + oneOf(self._function_names)("name") + p.operatorname <<= r"\operatorname" - ( + "{" + ZeroOrMore(p.simple | p.unknown_symbol)("name") + "}" + | Error(r"Expected \operatorname{name}")) + + p.start_group <<= ( + Optional(r"\math" + oneOf(self._fontnames)("font")) + "{") + p.end_group <<= "}" + p.group <<= ( + p.start_group + ZeroOrMore(p.token)("group") + p.end_group) + + p.font <<= "\\" + oneOf(self._fontnames)("font") + + p.simple_group <<= "{" + ZeroOrMore(p.token)("group") + "}" + p.required_group <<= "{" + OneOrMore(p.token)("group") + "}" + + p.frac <<= r"\frac" - ( + p.required_group("num") + p.required_group("den") + | Error(r"Expected \frac{num}{den}")) + p.dfrac <<= r"\dfrac" - ( + p.required_group("num") + p.required_group("den") + | Error(r"Expected \dfrac{num}{den}")) + p.binom <<= r"\binom" - ( + p.required_group("num") + p.required_group("den") + | Error(r"Expected \binom{num}{den}")) + + p.ambi_delim <<= oneOf(self._ambi_delim) + p.left_delim <<= oneOf(self._left_delim) + p.right_delim <<= oneOf(self._right_delim) + + p.genfrac <<= r"\genfrac" - ( + "{" + Optional(p.ambi_delim | p.left_delim)("ldelim") + "}" + + "{" + Optional(p.ambi_delim | p.right_delim)("rdelim") + "}" + + "{" + p.float_literal("rulesize") + "}" + + p.simple_group("style") + + p.required_group("num") + + p.required_group("den") + | Error("Expected " + r"\genfrac{ldelim}{rdelim}{rulesize}{style}{num}{den}")) + + p.sqrt <<= r"\sqrt" - ( + Optional("[" + OneOrMore(NotAny("]") + p.token)("root") + "]") + + p.required_group("value") + | Error(r"Expected \sqrt{value}")) + + p.overline <<= r"\overline" - ( + p.required_group("body") + | Error(r"Expected \overline{value}")) + + p.overset <<= r"\overset" - ( + p.simple_group("annotation") + p.simple_group("body") + | Error(r"Expected \overset{annotation}{body}")) + p.underset <<= r"\underset" - ( + p.simple_group("annotation") + p.simple_group("body") + | Error(r"Expected \underset{annotation}{body}")) + + p.unknown_symbol <<= Regex(r"\\[A-Za-z]*")("name") p.placeable <<= ( p.accentprefixed # Must be before accent so named symbols that are @@ -2131,6 +2079,7 @@ def __init__(self): | p.symbol # Must be third to catch all named symbols and single # chars not in a group | p.function + | p.operatorname | p.group | p.frac | p.dfrac @@ -2140,7 +2089,6 @@ def __init__(self): | p.underset | p.sqrt | p.overline - | p.operatorname ) p.simple <<= ( @@ -2150,14 +2098,12 @@ def __init__(self): | p.subsuper ) - p.subsuperop <<= oneOf(["_", "^"]) - - p.subsuper <<= Group( - (Optional(p.placeable) - + OneOrMore(p.subsuperop - p.placeable) - + Optional(p.apostrophe)) - | (p.placeable + Optional(p.apostrophe)) - | p.apostrophe + p.subsuper <<= ( + (Optional(p.placeable)("nucleus") + + OneOrMore(oneOf(["_", "^"]) - p.placeable)("subsuper") + + Regex("'*")("apostrophes")) + | Regex("'+")("apostrophes") + | (p.placeable("nucleus") + Regex("'*")("apostrophes")) ) p.token <<= ( @@ -2167,12 +2113,12 @@ def __init__(self): ) p.auto_delim <<= ( - Suppress(Literal(r"\left")) - - ((p.left_delim | p.ambi_delim) + r"\left" + - ((p.left_delim | p.ambi_delim)("left") | Error("Expected a delimiter")) - + Group(ZeroOrMore(p.simple | p.auto_delim)) - + Suppress(Literal(r"\right")) - - ((p.right_delim | p.ambi_delim) + + ZeroOrMore(p.simple | p.auto_delim)("mid") + + r"\right" + - ((p.right_delim | p.ambi_delim)("right") | Error("Expected a delimiter")) ) @@ -2186,12 +2132,6 @@ def __init__(self): p.non_math + ZeroOrMore(p.math_string + p.non_math) + StringEnd() ) - # Set actions - for key, val in vars(p).items(): - if not key.startswith('_'): - if hasattr(self, key): - val.setParseAction(getattr(self, key)) - self._expression = p.main self._math_expression = p.math @@ -2290,6 +2230,8 @@ def non_math(self, s, loc, toks): self.get_state().font = mpl.rcParams['mathtext.default'] return [hlist] + float_literal = staticmethod(pyparsing_common.convertToFloat) + def _make_space(self, percentage): # All spaces are relative to em width state = self.get_state() @@ -2319,16 +2261,15 @@ def _make_space(self, percentage): } def space(self, s, loc, toks): - tok, = toks - num = self._space_widths[tok] + num = self._space_widths[toks["space"]] box = self._make_space(num) return [box] def customspace(self, s, loc, toks): - return [self._make_space(float(toks[0]))] + return [self._make_space(toks["space"])] def symbol(self, s, loc, toks): - c, = toks + c = toks["sym"] try: char = Char(c, self.get_state()) except ValueError as err: @@ -2368,8 +2309,7 @@ def symbol(self, s, loc, toks): accentprefixed = symbol def unknown_symbol(self, s, loc, toks): - c, = toks - raise ParseFatalException(s, loc, "Unknown symbol: %s" % c) + raise ParseFatalException(s, loc, f"Unknown symbol: {toks['name']}") _accent_map = { r'hat': r'\circumflexaccent', @@ -2405,7 +2345,8 @@ def unknown_symbol(self, s, loc, toks): def accent(self, s, loc, toks): state = self.get_state() thickness = state.get_current_underline_thickness() - (accent, sym), = toks + accent = toks["accent"] + sym = toks["sym"] if accent in self._wide_accents: accent_box = AutoWidthChar( '\\' + accent, sym.width, state, char_class=Accent) @@ -2424,7 +2365,7 @@ def accent(self, s, loc, toks): def function(self, s, loc, toks): hlist = self.operatorname(s, loc, toks) - hlist.function_name, = toks + hlist.function_name = toks["name"] return hlist def operatorname(self, s, loc, toks): @@ -2433,7 +2374,8 @@ def operatorname(self, s, loc, toks): state.font = 'rm' hlist_list = [] # Change the font of Chars, but leave Kerns alone - for c in toks[0]: + name = toks["name"] + for c in name: if isinstance(c, Char): c.font = 'rm' c._update_metrics() @@ -2442,14 +2384,14 @@ def operatorname(self, s, loc, toks): hlist_list.append(Char(c, state)) else: hlist_list.append(c) - next_char_loc = loc + len(toks[0]) + 1 - if isinstance(toks[0], ParseResults): + next_char_loc = loc + len(name) + 1 + if isinstance(name, ParseResults): next_char_loc += len('operatorname{}') next_char = next((c for c in s[next_char_loc:] if c != ' '), '') delimiters = self._left_delim | self._ambi_delim | self._right_delim delimiters |= {'^', '_'} if (next_char not in delimiters and - toks[0] not in self._overunder_functions): + name not in self._overunder_functions): # Add thin space except when followed by parenthesis, bracket, etc. hlist_list += [self._make_space(self._space_widths[r'\,'])] self.pop_state() @@ -2458,22 +2400,25 @@ def operatorname(self, s, loc, toks): def start_group(self, s, loc, toks): self.push_state() # Deal with LaTeX-style font tokens - if len(toks): - self.get_state().font = toks[0][4:] + if toks.get("font"): + self.get_state().font = toks.get("font") return [] def group(self, s, loc, toks): - grp = Hlist(toks[0]) + grp = Hlist(toks.get("group", [])) return [grp] - required_group = simple_group = group + + def required_group(self, s, loc, toks): + return Hlist(toks.get("group", [])) + + simple_group = required_group def end_group(self, s, loc, toks): self.pop_state() return [] def font(self, s, loc, toks): - name, = toks - self.get_state().font = name + self.get_state().font = toks["font"] return [] def is_overunder(self, nucleus): @@ -2497,60 +2442,24 @@ def is_between_brackets(self, s, loc): return False def subsuper(self, s, loc, toks): - assert len(toks) == 1 - - nucleus = None - sub = None - super = None - - # Pick all of the apostrophes out, including first apostrophes that - # have been parsed as characters - napostrophes = 0 - new_toks = [] - for tok in toks[0]: - if isinstance(tok, str) and tok not in ('^', '_'): - napostrophes += len(tok) - elif isinstance(tok, Char) and tok.c == "'": - napostrophes += 1 - else: - new_toks.append(tok) - toks = new_toks - - if len(toks) == 0: - assert napostrophes - nucleus = Hbox(0.0) - elif len(toks) == 1: - if not napostrophes: - return toks[0] # .asList() - else: - nucleus = toks[0] - elif len(toks) in (2, 3): - # single subscript or superscript - nucleus = toks[0] if len(toks) == 3 else Hbox(0.0) - op, next = toks[-2:] + nucleus = toks.get("nucleus", Hbox(0)) + subsuper = toks.get("subsuper", []) + napostrophes = len(toks.get("apostrophes", [])) + + if not subsuper and not napostrophes: + return nucleus + + sub = super = None + while subsuper: + op, arg, *subsuper = subsuper if op == '_': - sub = next - else: - super = next - elif len(toks) in (4, 5): - # subscript and superscript - nucleus = toks[0] if len(toks) == 5 else Hbox(0.0) - op1, next1, op2, next2 = toks[-4:] - if op1 == op2: - if op1 == '_': + if sub is not None: raise ParseFatalException("Double subscript") - else: - raise ParseFatalException("Double superscript") - if op1 == '_': - sub = next1 - super = next2 + sub = arg else: - super = next1 - sub = next2 - else: - raise ParseFatalException( - "Subscript/superscript sequence is too long. " - "Use braces { } to remove ambiguity.") + if super is not None: + raise ParseFatalException("Double superscript") + super = arg state = self.get_state() rule_thickness = state.font_output.get_underline_thickness( @@ -2562,7 +2471,7 @@ def subsuper(self, s, loc, toks): if super is None: super = Hlist([]) for i in range(napostrophes): - super.children.extend(self.symbol(s, loc, ['\\prime'])) + super.children.extend(self.symbol(s, loc, {"sym": "\\prime"})) # kern() and hpack() needed to get the metrics right after # extending super.kern() @@ -2691,8 +2600,6 @@ def _genfrac(self, ldelim, rdelim, rule, style, num, den): state = self.get_state() thickness = state.get_current_underline_thickness() - rule = float(rule) - if style is not self._MathStyle.DISPLAYSTYLE: num.shrink() den.shrink() @@ -2728,28 +2635,29 @@ def _genfrac(self, ldelim, rdelim, rule, style, num, den): return result def genfrac(self, s, loc, toks): - args, = toks - return self._genfrac(*args) + return self._genfrac( + toks.get("ldelim", ""), toks.get("rdelim", ""), + toks["rulesize"], toks["style"], + toks["num"], toks["den"]) def frac(self, s, loc, toks): - thickness = self.get_state().get_current_underline_thickness() - (num, den), = toks - return self._genfrac('', '', thickness, self._MathStyle.TEXTSTYLE, - num, den) + return self._genfrac( + "", "", self.get_state().get_current_underline_thickness(), + self._MathStyle.TEXTSTYLE, toks["num"], toks["den"]) def dfrac(self, s, loc, toks): - thickness = self.get_state().get_current_underline_thickness() - (num, den), = toks - return self._genfrac('', '', thickness, self._MathStyle.DISPLAYSTYLE, - num, den) + return self._genfrac( + "", "", self.get_state().get_current_underline_thickness(), + self._MathStyle.DISPLAYSTYLE, toks["num"], toks["den"]) def binom(self, s, loc, toks): - (num, den), = toks - return self._genfrac('(', ')', 0.0, self._MathStyle.TEXTSTYLE, - num, den) + return self._genfrac( + "(", ")", 0, + self._MathStyle.TEXTSTYLE, toks["num"], toks["den"]) def _genset(self, s, loc, toks): - (annotation, body), = toks + annotation = toks["annotation"] + body = toks["body"] thickness = self.get_state().get_current_underline_thickness() annotation.shrink() @@ -2780,7 +2688,8 @@ def _genset(self, s, loc, toks): overset = underset = _genset def sqrt(self, s, loc, toks): - (root, body), = toks + root = toks.get("root") + body = toks["value"] state = self.get_state() thickness = state.get_current_underline_thickness() @@ -2819,7 +2728,7 @@ def sqrt(self, s, loc, toks): return [hlist] def overline(self, s, loc, toks): - (body,), = toks + body = toks["body"] state = self.get_state() thickness = state.get_current_underline_thickness() @@ -2860,6 +2769,5 @@ def _auto_sized_delimiter(self, front, middle, back): return hlist def auto_delim(self, s, loc, toks): - front, middle, back = toks - - return self._auto_sized_delimiter(front, middle.asList(), back) + return self._auto_sized_delimiter( + toks["left"], toks["mid"].asList(), toks["right"]) diff --git a/lib/matplotlib/tests/test_mathtext.py b/lib/matplotlib/tests/test_mathtext.py index 9f196a29216f..abe023495953 100644 --- a/lib/matplotlib/tests/test_mathtext.py +++ b/lib/matplotlib/tests/test_mathtext.py @@ -255,13 +255,12 @@ def test_fontinfo(): (r'$\left($', re.compile(r'Expected ("|\'\\)\\right["\']')), (r'$\dfrac$', r'Expected \dfrac{num}{den}'), (r'$\dfrac{}{}$', r'Expected \dfrac{num}{den}'), - (r'$\overset$', r'Expected \overset{body}{annotation}'), - (r'$\underset$', r'Expected \underset{body}{annotation}'), + (r'$\overset$', r'Expected \overset{annotation}{body}'), + (r'$\underset$', r'Expected \underset{annotation}{body}'), (r'$\foo$', r'Unknown symbol: \foo'), (r'$a^2^2$', r'Double superscript'), (r'$a_2_2$', r'Double subscript'), - (r'$a^2_a^2$', r'Subscript/superscript sequence is too long.'\ - r' Use braces { } to remove ambiguity.'), + (r'$a^2_a^2$', r'Double superscript'), ], ids=[ 'hspace without value',