From cb58589806ca5a6f73d0ee3d5eb9e9ee354ae757 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 3 Apr 2018 10:09:29 -0700 Subject: [PATCH] Cache various dviread constructs globally. Previously, caching was done at the level of the renderer, so new renderers would have to reconstruct the PsfontsMap and Adobe encoding tables. Using a global cache greatly improves the performance: something like rcdefaults() gca().text(.5, .5, "$foo$", usetex=True) %timeit savefig("/tmp/test.svg") goes from ~187ms to ~37ms. %timeit savefig("/tmp/test.pdf") goes from ~124ms to ~53ms. Also moves TextToPath's _get_ps_font_map_and_encoding to use a standard lru_cache. --- .../2018-02-15-AL-deprecations.rst | 14 +- lib/matplotlib/backends/backend_pdf.py | 10 +- lib/matplotlib/dviread.py | 28 ++-- lib/matplotlib/texmanager.py | 4 - lib/matplotlib/textpath.py | 121 ++++++++---------- 5 files changed, 83 insertions(+), 94 deletions(-) diff --git a/doc/api/next_api_changes/2018-02-15-AL-deprecations.rst b/doc/api/next_api_changes/2018-02-15-AL-deprecations.rst index 5d799c5587ec..27e189eb92b4 100644 --- a/doc/api/next_api_changes/2018-02-15-AL-deprecations.rst +++ b/doc/api/next_api_changes/2018-02-15-AL-deprecations.rst @@ -11,8 +11,12 @@ The following modules are deprecated: The following classes, methods, functions, and attributes are deprecated: - ``afm.parse_afm``, +- ``backend_pdf.PdfFile.texFontMap``, - ``backend_pgf.get_texcommand``, - ``backend_ps.get_bbox``, +- ``backend_qt5.FigureCanvasQT.keyAutoRepeat`` (directly check + ``event.guiEvent.isAutoRepeat()`` in the event handler to decide whether to + handle autorepeated key presses). - ``backend_qt5.error_msg_qt``, ``backend_qt5.exception_handler``, - ``backend_wx.FigureCanvasWx.macros``, - ``cbook.GetRealpathAndStat``, ``cbook.Locked``, @@ -22,7 +26,11 @@ The following classes, methods, functions, and attributes are deprecated: - ``contour.ContourLabeler.cl``, ``.cl_xy``, and ``.cl_cvalues``, - ``dates.DateFormatter.strftime_pre_1900``, ``dates.DateFormatter.strftime``, - ``font_manager.TempCache``, +- ``image._ImageBase.iterpnames``, use the ``interpolation_names`` property + instead. (this affects classes that inherit from ``_ImageBase`` including + :class:`FigureImage`, :class:`BboxImage`, and :class:`AxesImage`), - ``mathtext.unichr_safe`` (use ``chr`` instead), +- ``patches.Polygon.xy``, - ``table.Table.get_child_artists`` (use ``get_children`` instead), - ``testing.compare.ImageComparisonTest``, ``testing.compare.compare_float``, - ``testing.decorators.CleanupTest``, @@ -30,13 +38,9 @@ The following classes, methods, functions, and attributes are deprecated: - ``FigureCanvasQT.keyAutoRepeat`` (directly check ``event.guiEvent.isAutoRepeat()`` in the event handler to decide whether to handle autorepeated key presses). -- ``FigureCanvasWx.macros``, -- ``_ImageBase.iterpnames``, use the ``interpolation_names`` property instead. - (this affects classes that inherit from ``_ImageBase`` including - :class:`FigureImage`, :class:`BboxImage`, and :class:`AxesImage`), -- ``patches.Polygon.xy``, - ``texmanager.dvipng_hack_alpha``, - ``text.Annotation.arrow``, +- ``textpath.TextToPath.tex_font_map``, The following rcParams are deprecated: - ``pgf.debug`` (the pgf backend relies on logging), diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index aed2795f1145..a8b6a7e8250b 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -655,14 +655,11 @@ def fontName(self, fontprop): return Fx @property + @cbook.deprecated("3.0") def texFontMap(self): # lazy-load texFontMap, it takes a while to parse # and usetex is a relatively rare use case - if self._texFontMap is None: - self._texFontMap = dviread.PsfontsMap( - dviread.find_tex_file('pdftex.map')) - - return self._texFontMap + return dviread.PsfontsMap(dviread.find_tex_file('pdftex.map')) def dviFontName(self, dvifont): """ @@ -675,7 +672,8 @@ def dviFontName(self, dvifont): if dvi_info is not None: return dvi_info.pdfname - psfont = self.texFontMap[dvifont.texname] + tex_font_map = dviread.PsfontsMap(dviread.find_tex_file('pdftex.map')) + psfont = tex_font_map[dvifont.texname] if psfont.filename is None: raise ValueError( "No usable font file found for {} (TeX: {}); " diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py index a702f5585bc7..448b3011e71f 100644 --- a/lib/matplotlib/dviread.py +++ b/lib/matplotlib/dviread.py @@ -9,14 +9,14 @@ # iterate over pages: for page in dvi: w, h, d = page.width, page.height, page.descent - for x,y,font,glyph,width in page.text: + for x, y, font, glyph, width in page.text: fontname = font.texname pointsize = font.size ... - for x,y,height,width in page.boxes: + for x, y, height, width in page.boxes: ... - """ + from collections import namedtuple import enum from functools import lru_cache, partial, wraps @@ -34,6 +34,10 @@ _log = logging.getLogger(__name__) +# Many dvi related files are looked for by external processes, require +# additional parsing, and are used many times per rendering, which is why they +# are cached using lru_cache(). + # Dvi is a bytecode format documented in # http://mirrors.ctan.org/systems/knuth/dist/texware/dvitype.web # http://texdoc.net/texmf-dist/doc/generic/knuth/texware/dvitype.pdf @@ -808,14 +812,14 @@ class PsfontsMap(object): """ __slots__ = ('_font', '_filename') - def __init__(self, filename): + @lru_cache() + def __new__(cls, filename): + self = object.__new__(cls) self._font = {} - self._filename = filename - if isinstance(filename, bytes): - encoding = sys.getfilesystemencoding() or 'utf-8' - self._filename = filename.decode(encoding, errors='replace') + self._filename = os.fsdecode(filename) with open(filename, 'rb') as file: self._parse(file) + return self def __getitem__(self, texname): assert isinstance(texname, bytes) @@ -956,7 +960,8 @@ def __init__(self, filename): def __iter__(self): yield from self.encoding - def _parse(self, file): + @staticmethod + def _parse(file): result = [] lines = (line.split(b'%', 1)[0].strip() for line in file) @@ -975,6 +980,7 @@ def _parse(self, file): return re.findall(br'/([^][{}<>\s]+)', data) +@lru_cache() def find_tex_file(filename, format=None): """ Find a file in the texmf tree. @@ -1016,10 +1022,6 @@ def find_tex_file(filename, format=None): return result.decode('ascii') -# With multiple text objects per figure (e.g., tick labels) we may end -# up reading the same tfm and vf files many times, so we implement a -# simple cache. TODO: is this worth making persistent? - @lru_cache() def _fontfile(cls, suffix, texname): filename = find_tex_file(texname + suffix) diff --git a/lib/matplotlib/texmanager.py b/lib/matplotlib/texmanager.py index 085a65f49e9a..3858f01c3dc6 100644 --- a/lib/matplotlib/texmanager.py +++ b/lib/matplotlib/texmanager.py @@ -29,17 +29,13 @@ """ import copy -import distutils.version import glob import hashlib import logging import os from pathlib import Path import re -import shutil import subprocess -import sys -import warnings import numpy as np diff --git a/lib/matplotlib/textpath.py b/lib/matplotlib/textpath.py index cd44f1fa0c79..2e7f92500ba7 100644 --- a/lib/matplotlib/textpath.py +++ b/lib/matplotlib/textpath.py @@ -1,20 +1,26 @@ from collections import OrderedDict +import functools import urllib.parse import warnings import numpy as np -from matplotlib.path import Path -from matplotlib import rcParams -import matplotlib.font_manager as font_manager -from matplotlib.ft2font import KERNING_DEFAULT, LOAD_NO_HINTING -from matplotlib.ft2font import LOAD_TARGET_LIGHT -from matplotlib.mathtext import MathTextParser -import matplotlib.dviread as dviread +from matplotlib import cbook, dviread, font_manager, rcParams from matplotlib.font_manager import FontProperties, get_font +from matplotlib.ft2font import ( + KERNING_DEFAULT, LOAD_NO_HINTING, LOAD_TARGET_LIGHT) +from matplotlib.mathtext import MathTextParser +from matplotlib.path import Path from matplotlib.transforms import Affine2D +@functools.lru_cache(1) +def _get_adobe_standard_encoding(): + enc_name = dviread.find_tex_file('8a.enc') + enc = dviread.Encoding(enc_name) + return {c: i for i, c in enumerate(enc.encoding)} + + class TextToPath(object): """ A class that convert a given text to a path using ttf fonts. @@ -25,19 +31,12 @@ class TextToPath(object): def __init__(self): self.mathtext_parser = MathTextParser('path') - self.tex_font_map = None - - from matplotlib.cbook import maxdict - self._ps_fontd = maxdict(50) - self._texmanager = None - self._adobe_standard_encoding = None - - def _get_adobe_standard_encoding(self): - enc_name = dviread.find_tex_file('8a.enc') - enc = dviread.Encoding(enc_name) - return {c: i for i, c in enumerate(enc.encoding)} + @property + @cbook.deprecated("3.0") + def tex_font_map(self): + return dviread.PsfontsMap(dviread.find_tex_file('pdftex.map')) def _get_font(self, prop): """ @@ -281,13 +280,6 @@ def get_glyphs_tex(self, prop, s, glyph_map=None, texmanager = self.get_texmanager() - if self.tex_font_map is None: - self.tex_font_map = dviread.PsfontsMap( - dviread.find_tex_file('pdftex.map')) - - if self._adobe_standard_encoding is None: - self._adobe_standard_encoding = self._get_adobe_standard_encoding() - fontsize = prop.get_size_in_points() if hasattr(texmanager, "get_dvi"): dvifilelike = texmanager.get_dvi(s, self.FONT_SCALE) @@ -312,46 +304,7 @@ def get_glyphs_tex(self, prop, s, glyph_map=None, # characters into strings. # oldfont, seq = None, [] for x1, y1, dvifont, glyph, width in page.text: - font_and_encoding = self._ps_fontd.get(dvifont.texname) - font_bunch = self.tex_font_map[dvifont.texname] - - if font_and_encoding is None: - if font_bunch.filename is None: - raise ValueError( - ("No usable font file found for %s (%s). " - "The font may lack a Type-1 version.") - % (font_bunch.psname, dvifont.texname)) - - font = get_font(font_bunch.filename) - - for charmap_name, charmap_code in [("ADOBE_CUSTOM", - 1094992451), - ("ADOBE_STANDARD", - 1094995778)]: - try: - font.select_charmap(charmap_code) - except (ValueError, RuntimeError): - pass - else: - break - else: - charmap_name = "" - warnings.warn("No supported encoding in font (%s)." % - font_bunch.filename) - - if charmap_name == "ADOBE_STANDARD" and font_bunch.encoding: - enc0 = dviread.Encoding(font_bunch.encoding) - enc = {i: self._adobe_standard_encoding.get(c, None) - for i, c in enumerate(enc0.encoding)} - else: - enc = {} - self._ps_fontd[dvifont.texname] = font, enc - - else: - font, enc = font_and_encoding - - ft2font_flag = LOAD_TARGET_LIGHT - + font, enc = self._get_ps_font_and_encoding(dvifont.texname) char_id = self._get_char_id_ps(font, glyph) if char_id not in glyph_map: @@ -362,12 +315,13 @@ def get_glyphs_tex(self, prop, s, glyph_map=None, else: charcode = glyph + ft2font_flag = LOAD_TARGET_LIGHT if charcode is not None: glyph0 = font.load_char(charcode, flags=ft2font_flag) else: warnings.warn("The glyph (%d) of font (%s) cannot be " "converted with the encoding. Glyph may " - "be wrong" % (glyph, font_bunch.filename)) + "be wrong" % (glyph, font.fname)) glyph0 = font.load_char(glyph, flags=ft2font_flag) @@ -391,6 +345,41 @@ def get_glyphs_tex(self, prop, s, glyph_map=None, return (list(zip(glyph_ids, xpositions, ypositions, sizes)), glyph_map_new, myrects) + @staticmethod + @functools.lru_cache(50) + def _get_ps_font_and_encoding(texname): + tex_font_map = dviread.PsfontsMap(dviread.find_tex_file('pdftex.map')) + font_bunch = tex_font_map[texname] + if font_bunch.filename is None: + raise ValueError( + ("No usable font file found for %s (%s). " + "The font may lack a Type-1 version.") + % (font_bunch.psname, texname)) + + font = get_font(font_bunch.filename) + + for charmap_name, charmap_code in [("ADOBE_CUSTOM", 1094992451), + ("ADOBE_STANDARD", 1094995778)]: + try: + font.select_charmap(charmap_code) + except (ValueError, RuntimeError): + pass + else: + break + else: + charmap_name = "" + warnings.warn("No supported encoding in font (%s)." % + font_bunch.filename) + + if charmap_name == "ADOBE_STANDARD" and font_bunch.encoding: + enc0 = dviread.Encoding(font_bunch.encoding) + enc = {i: _get_adobe_standard_encoding().get(c, None) + for i, c in enumerate(enc0.encoding)} + else: + enc = {} + + return font, enc + text_to_path = TextToPath()