Correctly end tokens in mathtext parsing.

anntzer · anntzer · commit 483dce2b893a · 2022-05-15T20:28:02.000+02:00
This avoids parsing `\sinx` as `\sin x` (it now raises an error
instead), and removes the need for `accentprefixed` (because `\doteq`
is treated as a single token now, instead of `\dot{eq}`).  This also
means that `\doteq` (and friends) are now correctly treated as relations
(per `_relation_symbols`, thus changing the spacing around them); hence
then change in baseline images.  Only keep the `x \doteq y` baseline
(and adjust the test string to undo the spacing), to avoid regen'ing
baselines.

Also shaves ~2% off drawing all the current mathtext tests, i.e.
```
MPLBACKEND=agg python -c 'import time; from pylab import *; from matplotlib.tests.test_mathtext import math_tests; fig = figure(figsize=(3, 10)); fig.text(0, 0, "\n".join(filter(None, math_tests)), size=6); start = time.perf_counter(); [fig.canvas.draw() for _ in range(10)]; print((time.perf_counter() - start) / 10)'
```
(including adjustment for the two removed test cases), probably because
accentprefixed was previously extremely commonly checked, being at the
top of the placeable list; however, performance wasn't really the main
goal here.
diff --git a/lib/matplotlib/_mathtext.py b/lib/matplotlib/_mathtext.py
@@ -1708,10 +1708,24 @@ def set_names_and_parse_actions():
 
         # Root definitions.
 
+        # In TeX parlance, a csname is a control sequence name (a "\foo").
+        def csnames(group, names):
+            ends_with_alpha = []
+            ends_with_nonalpha = []
+            for name in names:
+                if name[-1].isalpha():
+                    ends_with_alpha.append(name)
+                else:
+                    ends_with_nonalpha.append(name)
+            return Regex(r"\\(?P<{}>(?:{})(?![A-Za-z]){})".format(
+                group,
+                "|".join(map(re.escape, ends_with_alpha)),
+                "".join(f"|{s}" for s in map(re.escape, ends_with_nonalpha)),
+            ))
+
         p.float_literal  = Regex(r"[-+]?([0-9]+\.?[0-9]*|\.[0-9]+)")
         p.space          = oneOf(self._space_widths)("space")
 
-        p.accentprefixed = "\\" + oneOf(self._accentprefixed)("sym")
         p.symbol         = Regex(
             r"[a-zA-Z0-9 +\-*/<>=:,.;!\?&'@()\[\]|\U00000080-\U0001ffff]"
             r"|\\[%${}\[\]_|]"
@@ -1720,7 +1734,7 @@ def set_names_and_parse_actions():
         )("sym").leaveWhitespace()
         p.unknown_symbol = Regex(r"\\[A-Za-z]*")("name")
 
-        p.font           = "\\" + oneOf(self._fontnames)("font")
+        p.font           = csnames("font", self._fontnames)
         p.start_group    = (
             Optional(r"\math" + oneOf(self._fontnames)("font")) + "{")
         p.end_group      = Literal("}")
@@ -1761,11 +1775,10 @@ def set_names_and_parse_actions():
             | Error(r"Expected \hspace{n}"))
 
         p.accent        <<= (
-            "\\"
-            + oneOf([*self._accent_map, *self._wide_accents])("accent")
+            csnames("accent", [*self._accent_map, *self._wide_accents])
             - p.placeable("sym"))
 
-        p.function      <<= "\\" + oneOf(self._function_names)("name")
+        p.function      <<= csnames("name", self._function_names)
         p.operatorname  <<= r"\operatorname" - (
             "{" + ZeroOrMore(p.simple | p.unknown_symbol)("name") + "}"
             | Error(r"Expected \operatorname{name}"))
@@ -1813,10 +1826,8 @@ def set_names_and_parse_actions():
             | Error(r"Expected \underset{annotation}{body}"))
 
         p.placeable     <<= (
-            p.accentprefixed  # Must be before accent so named symbols that are
-                              # prefixed with an accent name work
-            | p.accent   # Must be before symbol as all accents are symbols
-            | p.symbol   # Must be third to catch all named symbols and single
+            p.accent     # Must be before symbol as all accents are symbols
+            | p.symbol   # Must be second to catch all named symbols and single
                          # chars not in a group
             | p.function
             | p.operatorname
@@ -2004,8 +2015,6 @@ def symbol(self, s, loc, toks):
                 return [Hlist([char, self._make_space(0.2)], do_kern=True)]
         return [char]
 
-    accentprefixed = symbol
-
     def unknown_symbol(self, s, loc, toks):
         raise ParseFatalException(s, loc, f"Unknown symbol: {toks['name']}")
 
@@ -2034,12 +2043,6 @@ def unknown_symbol(self, s, loc, toks):
 
     _wide_accents = set(r"widehat widetilde widebar".split())
 
-    # make a lambda and call it to get the namespace right
-    _accentprefixed = (lambda am: [
-        p for p in tex2uni
-        if any(p.startswith(a) and a != p for a in am)
-    ])(set(_accent_map))
-
     def accent(self, s, loc, toks):
         state = self.get_state()
         thickness = state.get_current_underline_thickness()
diff --git a/lib/matplotlib/tests/test_mathtext.py b/lib/matplotlib/tests/test_mathtext.py
@@ -16,7 +16,7 @@
 # If test is removed, use None as placeholder
 math_tests = [
     r'$a+b+\dot s+\dot{s}+\ldots$',
-    r'$x \doteq y$',
+    r'$x\hspace{-0.2}\doteq\hspace{-0.2}y$',
     r'\$100.00 $\alpha \_$',
     r'$\frac{\$100.00}{y}$',
     r'$x   y$',
@@ -104,12 +104,12 @@
     r'$\mathring{A}  \AA$',
     r'$M \, M \thinspace M \/ M \> M \: M \; M \ M \enspace M \quad M \qquad M \! M$',
     r'$\Cap$ $\Cup$ $\leftharpoonup$ $\barwedge$ $\rightharpoonup$',
-    r'$\dotplus$ $\doteq$ $\doteqdot$ $\ddots$',
+    None,
     r'$xyz^kx_kx^py^{p-2} d_i^jb_jc_kd x^j_i E^0 E^0_u$',  # github issue #4873
     r'${xyz}^k{x}_{k}{x}^{p}{y}^{p-2} {d}_{i}^{j}{b}_{j}{c}_{k}{d} {x}^{j}_{i}{E}^{0}{E}^0_u$',
     r'${\int}_x^x x\oint_x^x x\int_{X}^{X}x\int_x x \int^x x \int_{x} x\int^{x}{\int}_{x} x{\int}^{x}_{x}x$',
     r'testing$^{123}$',
-    ' '.join('$\\' + p + '$' for p in sorted(_mathtext.Parser._accentprefixed)),
+    None,
     r'$6-2$; $-2$; $ -2$; ${-2}$; ${  -2}$; $20^{+3}_{-2}$',
     r'$\overline{\omega}^x \frac{1}{2}_0^x$',  # github issue #5444
     r'$,$ $.$ $1{,}234{, }567{ , }890$ and $1,234,567,890$',  # github issue 5799
@@ -223,6 +223,19 @@ def test_mathfont_rendering(baseline_images, fontset, index, text):
              horizontalalignment='center', verticalalignment='center')
 
 
+@check_figures_equal(extensions=["png"])
+def test_short_long_accents(fig_test, fig_ref):
+    acc_map = _mathtext.Parser._accent_map
+    short_accs = [s for s in acc_map if len(s) == 1]
+    corresponding_long_accs = []
+    for s in short_accs:
+        l, = [l for l in acc_map if len(l) > 1 and acc_map[l] == acc_map[s]]
+        corresponding_long_accs.append(l)
+    fig_test.text(0, .5, "$" + "".join(rf"\{s}a" for s in short_accs) + "$")
+    fig_ref.text(
+        0, .5, "$" + "".join(fr"\{l} a" for l in corresponding_long_accs) + "$")
+
+
 def test_fontinfo():
     fontpath = mpl.font_manager.findfont("DejaVu Sans")
     font = mpl.ft2font.FT2Font(fontpath)
@@ -235,6 +248,7 @@ def test_fontinfo():
     [
         (r'$\hspace{}$', r'Expected \hspace{n}'),
         (r'$\hspace{foo}$', r'Expected \hspace{n}'),
+        (r'$\sinx$', r'Unknown symbol: \sinx'),
         (r'$\frac$', r'Expected \frac{num}{den}'),
         (r'$\frac{}{}$', r'Expected \frac{num}{den}'),
         (r'$\binom$', r'Expected \binom{num}{den}'),
@@ -265,6 +279,7 @@ def test_fontinfo():
     ids=[
         'hspace without value',
         'hspace with invalid value',
+        'function without space',
         'frac without parameters',
         'frac with empty parameters',
         'binom without parameters',