From 645c8da7506ef235ba8ca4f17ca13463a4ca6284 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Fri, 12 Jan 2018 16:02:36 +0200
Subject: [PATCH 1/7] Cache kpsewhich results persistently

And allow batching them. This commit does not yet use the batching
but makes it possible.
---
 doc/api/next_api_changes/2018-02-16-JKS.rst   |   8 +
 doc/users/next_whats_new/texsupport_cache.rst |  22 ++
 lib/matplotlib/dviread.py                     | 251 ++++++++++++++++--
 lib/matplotlib/tests/test_dviread.py          |  84 ++++++
 4 files changed, 347 insertions(+), 18 deletions(-)
 create mode 100644 doc/api/next_api_changes/2018-02-16-JKS.rst
 create mode 100644 doc/users/next_whats_new/texsupport_cache.rst

diff --git a/doc/api/next_api_changes/2018-02-16-JKS.rst b/doc/api/next_api_changes/2018-02-16-JKS.rst
new file mode 100644
index 000000000000..f38ad6d50932
--- /dev/null
+++ b/doc/api/next_api_changes/2018-02-16-JKS.rst
@@ -0,0 +1,8 @@
+dviread changes
+---------------
+
+The ``format`` keyword argument to ``dviread.find_tex_file`` has been
+deprecated. The function without the ``format`` argument, as well as
+the new ``dviread.find_tex_files`` function, cache their results in
+``texsupport.N.db`` in the cache directory to speed up dvi file
+processing.
diff --git a/doc/users/next_whats_new/texsupport_cache.rst b/doc/users/next_whats_new/texsupport_cache.rst
new file mode 100644
index 000000000000..b823e962a1d9
--- /dev/null
+++ b/doc/users/next_whats_new/texsupport_cache.rst
@@ -0,0 +1,22 @@
+TeX support cache
+-----------------
+
+The `usetex` feature sends snippets of TeX code to LaTeX and related
+external tools for processing. This causes a nontrivial number of
+helper processes to be spawned, which can be slow on some platforms.
+A new cache database helps reduce the need to spawn these helper
+processes, which should improve `usetex` processing speed.
+
+The new cache files
+~~~~~~~~~~~~~~~~~~~
+
+The cache database is stored in a file named `texsupport.N.db` in the
+standard cache directory (traditionally `$HOME/.matplotlib` but
+possibly `$HOME/.cache/matplotlib`), where `N` stands for a version
+number. The version number is incremented when new kinds of items are
+added to the caching code, in order to avoid version clashes when
+using multiple different versions of Matplotlib. The auxiliary files
+`texsupport.N.db-wal` and `texsupport.N.db-shm` help coordinate usage
+of the cache between concurrently running instances. All of these
+cache files may be deleted when Matplotlib is not running, and
+subsequent calls to the `usetex` code will recompute the TeX results.
diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py
index e0048d8b8c3f..52a66d276e86 100644
--- a/lib/matplotlib/dviread.py
+++ b/lib/matplotlib/dviread.py
@@ -24,12 +24,13 @@
 import os
 import re
 import struct
+import sqlite3
 import sys
 import textwrap
 
 import numpy as np
 
-from matplotlib import cbook, rcParams
+from matplotlib import cbook, get_cachedir, rcParams
 from matplotlib.compat import subprocess
 
 _log = logging.getLogger(__name__)
@@ -980,45 +981,259 @@ def _parse(self, file):
         return re.findall(br'/([^][{}<>\s]+)', data)
 
 
-def find_tex_file(filename, format=None):
+class TeXSupportCacheError(Exception):
+    pass
+
+
+class TeXSupportCache:
+    """A persistent cache of data related to support files related to dvi
+    files produced by TeX. Currently holds results from :program:`kpsewhich`,
+    in future versions could hold pre-parsed font data etc.
+
+    Usage::
+
+      # create or get the singleton instance
+      cache = TeXSupportCache.get_cache()
+      with cache.connection as transaction:
+          cache.update_pathnames(
+              {"pdftex.map": "/usr/local/pdftex.map",
+               "cmsy10.pfb": "/usr/local/fonts/cmsy10.pfb"},
+               transaction)
+      pathnames = cache.get_pathnames(["pdftex.map", "cmr10.pfb"])
+      # now pathnames = {"pdftex.map": "/usr/local/pdftex.map"}
+
+      # optional after inserting new data, may improve query performance:
+      cache.optimize()
+
+    Parameters
+    ----------
+
+    filename : str, optional
+        File in which to store the cache. Defaults to `texsupport.N.db` in
+        the standard cache directory where N is the current schema version.
+
+    Attributes
+    ----------
+
+    connection
+        This database connection object has a context manager to set up
+        a transaction. Transactions are passed into methods that write to
+        the database.
     """
-    Find a file in the texmf tree.
+
+    __slots__ = ('connection')
+    schema_version = 1  # should match PRAGMA user_version in _create
+    instance = None
+
+    @classmethod
+    def get_cache(cls):
+        "Return the singleton instance of the cache, at the default location"
+        if cls.instance is None:
+            cls.instance = cls()
+        return cls.instance
+
+    def __init__(self, filename=None):
+        if filename is None:
+            filename = os.path.join(get_cachedir(), 'texsupport.%d.db'
+                                    % self.schema_version)
+
+        self.connection = sqlite3.connect(
+                filename, isolation_level="DEFERRED")
+        with self.connection as conn:
+            conn.execute("PRAGMA journal_mode=WAL;")
+            version, = conn.execute("PRAGMA user_version;").fetchone()
+
+        if version == 0:
+            self._create()
+        elif version != self.schema_version:
+            raise TeXSupportCacheError(
+                "support database %s has version %d, expected %d"
+                % (filename, version, self.schema_version))
+
+    def _create(self):
+        """Create the database."""
+        with self.connection as conn:
+            conn.executescript(
+                """
+                PRAGMA page_size=4096;
+                CREATE TABLE file_path(
+                    filename TEXT PRIMARY KEY NOT NULL,
+                    pathname TEXT
+                ) WITHOUT ROWID;
+                PRAGMA user_version=1;
+                """)
+
+    def optimize(self):
+        """Optional optimization phase after updating data.
+        Executes sqlite's `PRAGMA optimize` statement, which can call
+        `ANALYZE` or other functions that can improve future query performance
+        by spending some time up-front."""
+        with self.connection as conn:
+            conn.execute("PRAGMA optimize;")
+
+    def get_pathnames(self, filenames):
+        """Query the cache for pathnames related to `filenames`.
+
+        Parameters
+        ----------
+        filenames : iterable of str
+
+        Returns
+        -------
+        mapping from str to (str or None)
+            For those filenames that exist in the cache, the mapping
+            includes either the related pathname or None to indicate that
+            the named file does not exist.
+        """
+        rows = self.connection.execute(
+            "SELECT filename, pathname FROM file_path WHERE filename IN "
+            "(%s)"
+            % ','.join('?' for _ in filenames),
+            filenames).fetchall()
+        return {filename: pathname for (filename, pathname) in rows}
+
+    def update_pathnames(self, mapping, transaction):
+        """Update the cache with the given filename-to-pathname mapping
+
+        Parameters
+        ----------
+        mapping : mapping from str to (str or None)
+            Mapping from filenames to the corresponding full pathnames
+            or None to indicate that the named file does not exist.
+        transaction : obtained via the context manager of self.connection
+        """
+        transaction.executemany(
+            "INSERT OR REPLACE INTO file_path (filename, pathname) "
+            "VALUES (?, ?)",
+            mapping.items())
+
+
+def find_tex_files(filenames, cache=None):
+    """Find multiple files in the texmf tree. This can be more efficient
+    than `find_tex_file` because it makes only one call to `kpsewhich`.
 
     Calls :program:`kpsewhich` which is an interface to the kpathsea
     library [1]_. Most existing TeX distributions on Unix-like systems use
     kpathsea. It is also available as part of MikTeX, a popular
     distribution on Windows.
 
+    The results are cached into the TeX support database. In case of
+    mistaken results, deleting the database resets the cache.
+
     Parameters
     ----------
     filename : string or bytestring
-    format : string or bytestring
-        Used as the value of the `--format` option to :program:`kpsewhich`.
-        Could be e.g. 'tfm' or 'vf' to limit the search to that type of files.
+    cache : TeXSupportCache, optional
+        Cache instance to use, defaults to the singleton instance of the class.
 
     References
     ----------
 
     .. [1] `Kpathsea documentation <http://www.tug.org/kpathsea/>`_
         The library that :program:`kpsewhich` is part of.
+
     """
 
     # we expect these to always be ascii encoded, but use utf-8
     # out of caution
-    if isinstance(filename, bytes):
-        filename = filename.decode('utf-8', errors='replace')
-    if isinstance(format, bytes):
-        format = format.decode('utf-8', errors='replace')
+    filenames = [f.decode('utf-8', errors='replace')
+                 if isinstance(f, bytes) else f
+                 for f in filenames]
+    if cache is None:
+        cache = TeXSupportCache.get_cache()
+    result = cache.get_pathnames(filenames)
+
+    filenames = [f for f in filenames if f not in result]
+    if not filenames:
+        return result
 
-    cmd = ['kpsewhich']
-    if format is not None:
-        cmd += ['--format=' + format]
-    cmd += [filename]
-    _log.debug('find_tex_file(%s): %s', filename, cmd)
+    cmd = ['kpsewhich'] + list(filenames)
+    _log.debug('find_tex_files: %s', cmd)
     pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE)
-    result = pipe.communicate()[0].rstrip()
-    _log.debug('find_tex_file result: %s', result)
-    return result.decode('ascii')
+    output = pipe.communicate()[0].decode('ascii').splitlines()
+    _log.debug('find_tex_files result: %s', output)
+    mapping = _match(filenames, output)
+    with cache.connection as transaction:
+        cache.update_pathnames(mapping, transaction)
+    result.update(mapping)
+
+    return result
+
+
+def _match(filenames, pathnames):
+    """
+    Match filenames to pathnames in lists that are in matching order,
+    except that some filenames may lack pathnames.
+    """
+    result = {f: None for f in filenames}
+    filenames, pathnames = iter(filenames), iter(pathnames)
+    try:
+        filename, pathname = next(filenames), next(pathnames)
+        while True:
+            if pathname.endswith(os.path.sep + filename):
+                result[filename] = pathname
+                pathname = next(pathnames)
+            filename = next(filenames)
+    except StopIteration:
+        return result
+
+
+def find_tex_file(filename, format=None, cache=None):
+    """
+    Find a file in the texmf tree.
+
+    Calls :program:`kpsewhich` which is an interface to the kpathsea
+    library [1]_. Most existing TeX distributions on Unix-like systems use
+    kpathsea. It is also available as part of MikTeX, a popular
+    distribution on Windows.
+
+    The results are cached into a database whose location defaults to
+    :file:`~/.matplotlib/texsupport.db`. In case of mistaken results,
+    deleting this file resets the cache.
+
+    Parameters
+    ----------
+    filename : string or bytestring
+    format : string or bytestring, DEPRECATED
+        Used as the value of the `--format` option to :program:`kpsewhich`.
+        Could be e.g. 'tfm' or 'vf' to limit the search to that type of files.
+        Deprecated to allow batching multiple filenames into one kpsewhich
+        call, since any format option would apply to all filenames at once.
+    cache : TeXSupportCache, optional
+        Cache instance to use, defaults to the singleton instance of the class.
+
+    References
+    ----------
+
+    .. [1] `Kpathsea documentation <http://www.tug.org/kpathsea/>`_
+        The library that :program:`kpsewhich` is part of.
+    """
+
+    if format is not None:
+        cbook.warn_deprecated(
+            "3.0",
+            "The format option to find_tex_file is deprecated "
+            "to allow batching multiple filenames into one call. "
+            "Omitting the option should not change the result, as "
+            "kpsewhich uses the filename extension to choose the path.")
+        # we expect these to always be ascii encoded, but use utf-8
+        # out of caution
+        if isinstance(filename, bytes):
+            filename = filename.decode('utf-8', errors='replace')
+        if isinstance(format, bytes):
+            format = format.decode('utf-8', errors='replace')
+
+        cmd = ['kpsewhich']
+        if format is not None:
+            cmd += ['--format=' + format]
+        cmd += [filename]
+        _log.debug('find_tex_file(%s): %s', filename, cmd)
+        pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+        result = pipe.communicate()[0].rstrip()
+        _log.debug('find_tex_file result: %s', result)
+        return result.decode('ascii')
+
+    return list(find_tex_files([filename], cache).values())[0]
 
 
 # With multiple text objects per figure (e.g., tick labels) we may end
diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py
index 6b005fd34170..4a5b924a2312 100644
--- a/lib/matplotlib/tests/test_dviread.py
+++ b/lib/matplotlib/tests/test_dviread.py
@@ -1,9 +1,16 @@
 from matplotlib.testing.decorators import skip_if_command_unavailable
 
+try:
+    from unittest import mock
+except ImportError:
+    import mock
+
 import matplotlib.dviread as dr
 import os.path
 import json
 import pytest
+import sqlite3
+import warnings
 
 
 def test_PsfontsMap(monkeypatch):
@@ -68,3 +75,80 @@ def test_dviread():
                  'boxes': [[b.x, b.y, b.height, b.width] for b in page.boxes]}
                 for page in dvi]
     assert data == correct
+
+
+def test_TeXSupportCache(tmpdir):
+    dbfile = str(tmpdir / "test.db")
+    cache = dr.TeXSupportCache(filename=dbfile)
+    assert cache.get_pathnames(['foo', 'bar']) == {}
+    with cache.connection as transaction:
+        cache.update_pathnames({'foo': '/tmp/foo',
+                                'xyzzy': '/xyzzy.dat',
+                                'fontfile': None}, transaction)
+    assert cache.get_pathnames(['foo', 'bar']) == {'foo': '/tmp/foo'}
+    assert cache.get_pathnames(['xyzzy', 'fontfile']) == \
+        {'xyzzy': '/xyzzy.dat', 'fontfile': None}
+
+
+def test_TeXSupportCache_versioning(tmpdir):
+    dbfile = str(tmpdir / "test.db")
+    cache1 = dr.TeXSupportCache(dbfile)
+    with cache1.connection as transaction:
+        cache1.update_pathnames({'foo': '/tmp/foo'}, transaction)
+
+    with sqlite3.connect(dbfile, isolation_level="DEFERRED") as conn:
+        conn.executescript('PRAGMA user_version=1000000000;')
+
+    with pytest.raises(dr.TeXSupportCacheError):
+        cache2 = dr.TeXSupportCache(dbfile)
+
+
+def test_find_tex_files(tmpdir):
+    with mock.patch('matplotlib.dviread.subprocess.Popen') as mock_popen:
+        mock_proc = mock.Mock()
+        stdout = '{s}tmp{s}foo.pfb\n{s}tmp{s}bar.map\n'.\
+                 format(s=os.path.sep).encode('ascii')
+        mock_proc.configure_mock(**{'communicate.return_value': (stdout, b'')})
+        mock_popen.return_value = mock_proc
+
+        # first call uses the results from kpsewhich
+        cache = dr.TeXSupportCache(filename=str(tmpdir / "test.db"))
+        assert dr.find_tex_files(
+            ['foo.pfb', 'cmsy10.pfb', 'bar.tmp', 'bar.map'], cache) \
+            == {'foo.pfb': '{s}tmp{s}foo.pfb'.format(s=os.path.sep),
+                'bar.map': '{s}tmp{s}bar.map'.format(s=os.path.sep),
+                'cmsy10.pfb': None, 'bar.tmp': None}
+        assert mock_popen.called
+
+        # second call (subset of the first one) uses only the cache
+        mock_popen.reset_mock()
+        assert dr.find_tex_files(['foo.pfb', 'cmsy10.pfb'], cache) \
+            == {'foo.pfb': '{s}tmp{s}foo.pfb'.format(s=os.path.sep),
+                'cmsy10.pfb': None}
+        assert not mock_popen.called
+
+        # third call (includes more than the first one) uses kpsewhich again
+        mock_popen.reset_mock()
+        stdout = '{s}usr{s}local{s}cmr10.tfm\n'.\
+                 format(s=os.path.sep).encode('ascii')
+        mock_proc.configure_mock(**{'communicate.return_value': (stdout, b'')})
+        mock_popen.return_value = mock_proc
+        assert dr.find_tex_files(['foo.pfb', 'cmr10.tfm'], cache) == \
+            {'foo.pfb': '{s}tmp{s}foo.pfb'.format(s=os.path.sep),
+             'cmr10.tfm': '{s}usr{s}local{s}cmr10.tfm'.format(s=os.path.sep)}
+        assert mock_popen.called
+
+
+def test_find_tex_file_format():
+    with mock.patch('matplotlib.dviread.subprocess.Popen') as mock_popen:
+        mock_proc = mock.Mock()
+        stdout = b'/foo/bar/baz\n'
+        mock_proc.configure_mock(**{'communicate.return_value': (stdout, b'')})
+        mock_popen.return_value = mock_proc
+
+        warnings.filterwarnings(
+            'ignore',
+            'The format option to find_tex_file is deprecated.*',
+            UserWarning)
+        assert dr.find_tex_file('foobar', format='tfm') == '/foo/bar/baz'
+        assert mock_popen.called

From 2124ac8bcfe9a9d0d096cc21da40415301b6f653 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Fri, 16 Feb 2018 18:00:39 +0200
Subject: [PATCH 2/7] Include next_whats_new/* again

---
 doc/users/whats_new.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/users/whats_new.rst b/doc/users/whats_new.rst
index 648daabc7c5b..3c5fdc23af73 100644
--- a/doc/users/whats_new.rst
+++ b/doc/users/whats_new.rst
@@ -14,12 +14,12 @@ revision, see the :ref:`github-stats`.
 ..
    For a release, add a new section after this, then comment out the include
    and toctree below by indenting them. Uncomment them after the release.
-   .. include:: next_whats_new/README.rst
-   .. toctree::
-      :glob:
-      :maxdepth: 1
+.. include:: next_whats_new/README.rst
+.. toctree::
+   :glob:
+   :maxdepth: 1
 
-      next_whats_new/*
+   next_whats_new/*
 
 
 New in Matplotlib 2.2

From 3ce3061a018a622ff9a7b2e113a53c8d00a0a548 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Sun, 18 Feb 2018 14:47:20 +0200
Subject: [PATCH 3/7] Enable some sqlite and pysqlite options

- synchronous=normal (fewer disk writes, still safe in WAL mode)
- foreign key enforcement
- log sql statements at debug level
- use sqlite3.Row (enables accessing columns by name)
---
 lib/matplotlib/dviread.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py
index 52a66d276e86..77a930de990f 100644
--- a/lib/matplotlib/dviread.py
+++ b/lib/matplotlib/dviread.py
@@ -1039,8 +1039,17 @@ def __init__(self, filename=None):
 
         self.connection = sqlite3.connect(
                 filename, isolation_level="DEFERRED")
+        if _log.isEnabledFor(logging.DEBUG):
+            def debug_sql(sql):
+                _log.debug(' '.join(sql.splitlines()).strip())
+            self.connection.set_trace_callback(debug_sql)
+        self.connection.row_factory = sqlite3.Row
         with self.connection as conn:
-            conn.execute("PRAGMA journal_mode=WAL;")
+            conn.executescript("""
+                PRAGMA journal_mode=WAL;
+                PRAGMA synchronous=NORMAL;
+                PRAGMA foreign_keys=ON;
+            """)
             version, = conn.execute("PRAGMA user_version;").fetchone()
 
         if version == 0:

From ba47418796e5b2a8a84bbe8799f1ca01b92c1c73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Fri, 12 Jan 2018 20:29:10 +0200
Subject: [PATCH 4/7] Call kpsewhich with more arguments at one time

This should improve performance if there is a significant startup
cost to running kpsewhich, as reported by some users in #4880.
---
 lib/matplotlib/dviread.py                     | 121 ++++++++++++++++--
 .../tests/baseline_images/dviread/broken1.dvi | Bin 0 -> 851 bytes
 .../tests/baseline_images/dviread/broken2.dvi | Bin 0 -> 856 bytes
 .../tests/baseline_images/dviread/broken3.dvi | Bin 0 -> 856 bytes
 .../tests/baseline_images/dviread/broken4.dvi | Bin 0 -> 856 bytes
 .../tests/baseline_images/dviread/broken5.dvi | Bin 0 -> 856 bytes
 .../tests/baseline_images/dviread/virtual.vf  | Bin 0 -> 56 bytes
 .../tests/baseline_images/dviread/virtual.vpl |  23 ++++
 lib/matplotlib/tests/test_dviread.py          |  22 ++++
 9 files changed, 156 insertions(+), 10 deletions(-)
 create mode 100644 lib/matplotlib/tests/baseline_images/dviread/broken1.dvi
 create mode 100644 lib/matplotlib/tests/baseline_images/dviread/broken2.dvi
 create mode 100644 lib/matplotlib/tests/baseline_images/dviread/broken3.dvi
 create mode 100644 lib/matplotlib/tests/baseline_images/dviread/broken4.dvi
 create mode 100644 lib/matplotlib/tests/baseline_images/dviread/broken5.dvi
 create mode 100644 lib/matplotlib/tests/baseline_images/dviread/virtual.vf
 create mode 100644 lib/matplotlib/tests/baseline_images/dviread/virtual.vpl

diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py
index 77a930de990f..7d464209d06c 100644
--- a/lib/matplotlib/dviread.py
+++ b/lib/matplotlib/dviread.py
@@ -171,8 +171,7 @@ def wrapper(self, byte):
 class Dvi(object):
     """
     A reader for a dvi ("device-independent") file, as produced by TeX.
-    The current implementation can only iterate through pages in order,
-    and does not even attempt to verify the postamble.
+    The current implementation can only iterate through pages in order.
 
     This class can be used as a context manager to close the underlying
     file upon exit. Pages can be read via iteration. Here is an overly
@@ -180,13 +179,26 @@ class Dvi(object):
 
     >>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
     >>>     for page in dvi:
-    >>>         print(''.join(unichr(t.glyph) for t in page.text))
+    >>>         print(''.join(chr(t.glyph) for t in page.text))
+
+    Parameters
+    ----------
+
+    filename : str
+        dvi file to read
+    dpi : number or None
+        Dots per inch, can be floating-point; this affects the
+        coordinates returned. Use None to get TeX's internal units
+        which are likely only useful for debugging.
+    cache : TeXSupportCache instance, optional
+        Support file cache instance, defaults to the TeXSupportCache
+        singleton.
     """
     # dispatch table
     _dtable = [None] * 256
     _dispatch = partial(_dispatch, _dtable)
 
-    def __init__(self, filename, dpi):
+    def __init__(self, filename, dpi, cache=None):
         """
         Read the data from the file named *filename* and convert
         TeX's internal units to units of *dpi* per inch.
@@ -194,11 +206,20 @@ def __init__(self, filename, dpi):
         Use None to return TeX's internal units.
         """
         _log.debug('Dvi: %s', filename)
+        if cache is None:
+            cache = TeXSupportCache.get_cache()
+        self.cache = cache
         self.file = open(filename, 'rb')
         self.dpi = dpi
         self.fonts = {}
         self.state = _dvistate.pre
         self.baseline = self._get_baseline(filename)
+        self.fontnames = sorted(set(self._read_fonts()))
+        # populate kpsewhich cache with font pathnames
+        find_tex_files([x + suffix for x in self.fontnames
+                        for suffix in ('.tfm', '.vf', '.pfb')],
+                       cache)
+        cache.optimize()
 
     def _get_baseline(self, filename):
         if rcParams['text.latex.preview']:
@@ -206,8 +227,8 @@ def _get_baseline(self, filename):
             baseline_filename = base + ".baseline"
             if os.path.exists(baseline_filename):
                 with open(baseline_filename, 'rb') as fd:
-                    l = fd.read().split()
-                height, depth, width = l
+                    line = fd.read().split()
+                height, depth, width = line
                 return float(depth)
         return None
 
@@ -294,6 +315,61 @@ def _output(self):
         return Page(text=text, boxes=boxes, width=(maxx-minx)*d,
                     height=(maxy_pure-miny)*d, descent=descent)
 
+    def _read_fonts(self):
+        """Read the postamble of the file and return a list of fonts used."""
+
+        file = self.file
+        offset = -1
+        while offset > -100:
+            file.seek(offset, 2)
+            byte = file.read(1)[0]
+            if byte != 223:
+                break
+            offset -= 1
+        if offset >= -4:
+            raise ValueError(
+                "malformed dvi file %s: too few 223 bytes" % file.name)
+        if byte != 2:
+            raise ValueError(
+                ("malformed dvi file %s: post-postamble "
+                 "identification byte not 2") % file.name)
+        file.seek(offset - 4, 2)
+        offset = struct.unpack('!I', file.read(4))[0]
+        file.seek(offset, 0)
+        try:
+            byte = file.read(1)[0]
+        except IndexError:
+            raise ValueError(
+                "malformed dvi file %s: postamble offset %d out of range"
+                % (file.name, offset))
+        if byte != 248:
+            raise ValueError(
+                "malformed dvi file %s: postamble not found at offset %d"
+                % (file.name, offset))
+
+        fonts = []
+        file.seek(28, 1)
+        while True:
+            byte = file.read(1)[0]
+            if 243 <= byte <= 246:
+                _, _, _, _, a, length = (
+                    _arg_olen1(self, byte-243),
+                    _arg(4, False, self, None),
+                    _arg(4, False, self, None),
+                    _arg(4, False, self, None),
+                    _arg(1, False, self, None),
+                    _arg(1, False, self, None))
+                fontname = file.read(a + length)[-length:].decode('ascii')
+                fonts.append(fontname)
+            elif byte == 249:
+                break
+            else:
+                raise ValueError(
+                    "malformed dvi file %s: opcode %d in postamble"
+                    % (file.name, byte))
+        file.seek(0, 0)
+        return fonts
+
     def _read(self):
         """
         Read one page from the file. Return True if successful,
@@ -593,6 +669,10 @@ class Vf(Dvi):
     ----------
 
     filename : string or bytestring
+        vf file to read
+    cache : TeXSupportCache instance, optional
+        Support file cache instance, defaults to the TeXSupportCache
+        singleton.
 
     Notes
     -----
@@ -603,8 +683,8 @@ class Vf(Dvi):
     but replaces the `_read` loop and dispatch mechanism.
     """
 
-    def __init__(self, filename):
-        Dvi.__init__(self, filename, 0)
+    def __init__(self, filename, cache=None):
+        Dvi.__init__(self, filename, dpi=0, cache=cache)
         try:
             self._first_font = None
             self._chars = {}
@@ -615,6 +695,27 @@ def __init__(self, filename):
     def __getitem__(self, code):
         return self._chars[code]
 
+    def _read_fonts(self):
+        """Read through the font-definition section of the vf file
+        and return the list of font names."""
+        fonts = []
+        self.file.seek(0, 0)
+        while True:
+            byte = self.file.read(1)[0]
+            if byte <= 242 or byte >= 248:
+                break
+            elif 243 <= byte <= 246:
+                _ = self._arg(byte - 242)
+                _, _, _, a, length = [self._arg(x) for x in (4, 4, 4, 1, 1)]
+                fontname = self.file.read(a + length)[-length:].decode('ascii')
+                fonts.append(fontname)
+            elif byte == 247:
+                _, k = self._arg(1), self._arg(1)
+                _ = self.file.read(k)
+                _, _ = self._arg(4), self._arg(4)
+        self.file.seek(0, 0)
+        return fonts
+
     def _read(self):
         """
         Read one page from the file. Return True if successful,
@@ -652,8 +753,8 @@ def _read(self):
                 self._init_packet(packet_len)
             elif 243 <= byte <= 246:
                 k = self._arg(byte - 242, byte == 246)
-                c, s, d, a, l = [self._arg(x) for x in (4, 4, 4, 1, 1)]
-                self._fnt_def_real(k, c, s, d, a, l)
+                c, s, d, a, length = [self._arg(x) for x in (4, 4, 4, 1, 1)]
+                self._fnt_def_real(k, c, s, d, a, length)
                 if self._first_font is None:
                     self._first_font = k
             elif byte == 247:       # preamble
diff --git a/lib/matplotlib/tests/baseline_images/dviread/broken1.dvi b/lib/matplotlib/tests/baseline_images/dviread/broken1.dvi
new file mode 100644
index 0000000000000000000000000000000000000000..6e960f435de970a10bc9dd9644db1c94cf0cb0f1
GIT binary patch
literal 851
zcmey)#MnIPfQ&T*5HP=xRtQOrP{=PWDJU&bFfuSS)iX5EGcd6-G%&U32C85LDI)~_
z1Hl5ON(P4B1%DSaFf3rQ2Q!}l$%(!U44>J(KPFT%Z~=`0Vb<i_B13~sAsLy))0jg;
zGm9rDPWI`Y|M#<L@5Dc8X`k6{PF@hizz$Ri!Yn}5<{KUR`X(}*?c5a7JMmwr-Fzl>
z8P~pv%#SM~CYPiZmrPE{pWrT=T$-DjH(|%)lD;K8Ae-$N7}D~KKvsa%Wagz$&P^;S
z$jL9s$xKo&o}5yaS(KWX(zmb|th5E>RmMmLhQ7Y}ia##&_RfEI&jCpO4dCgW#Bkxd
z{bvq;-diyYtRNpie36@Jx>>RhEIFT1S*;UjUn&Dj=&AY4Ul;XGC=dP2+QvLP3a$g>
z6VnYweLxEY6%X_RO+9_me*UNA_RkEzHkJy(!-p+7H?_jhV09EwA>YYAKy_y(DQz_9
z>+1#Sxq8xJ{+DM7s|A4O00rj%v{3<yK7MEQnTvZtk{z-sxtWFrTPuJLyz@1DK1(?t
z#DT&q9?j>}xYpadsW>yQZzAu<V?Z?!!zM9&yqE(vT@WNNiQ)UHfcf8k+}&sdH01Bc
z>`f^^rJ!(~FM0CMdye3O)a1;>9L2uA9#B$Y0%Br-7M`TCDKRg<BqOzGA~4np64O(G
u&gl68G&&SH&4N-I<8+`^91Kj@eT_A)uqH!nsSR7aVUMGqK&NeFx(@(>l@as+

literal 0
HcmV?d00001

diff --git a/lib/matplotlib/tests/baseline_images/dviread/broken2.dvi b/lib/matplotlib/tests/baseline_images/dviread/broken2.dvi
new file mode 100644
index 0000000000000000000000000000000000000000..bd2b7479534633dc526652e7cb7f5bbe3a4bfaf3
GIT binary patch
literal 856
zcmey)#MnIPfQ&T*5HP=xRtQOrP{=PWDJU&bFfuSS)iX5EGcd6-G%&U32C85LDI)~_
z1Hl5ON(P4B1%DSaFf3rQ2Q!}l$%(!U44>J(KPFT%Z~=`0Vb<i_B13~sAsLy))0jg;
zGm9rDPWI`Y|M#<L@5Dc8X`k6{PF@hizz$Ri!Yn}5<{KUR`X(}*?c5a7JMmwr-Fzl>
z8P~pv%#SM~CYPiZmrPE{pWrT=T$-DjH(|%)lD;K8Ae-$N7}D~KKvsa%Wagz$&P^;S
z$jL9s$xKo&o}5yaS(KWX(zmb|th5E>RmMmLhQ7Y}ia##&_RfEI&jCpO4dCgW#Bkxd
z{bvq;-diyYtRNpie36@Jx>>RhEIFT1S*;UjUn&Dj=&AY4Ul;XGC=dP2+QvLP3a$g>
z6VnYweLxEY6%X_RO+9_me*UNA_RkEzHkJy(!-p+7H?_jhV09EwA>YYAKy_y(DQz_9
z>+1#Sxq8xJ{+DM7s|A4O00rj%v{3<yK7MEQnTvZtk{z-sxtWFrTPuJLyz@1DK1(?t
z#DT&q9?j>}xYpadsW>yQZzAu<V?Z?!!zM9&yqE(vT@WNNiQ)UHfcf8k+}&sdH01Bc
z>`f^^rJ!(~FM0CMdye3O)a1;>9L2uA9#B$Y0%Br-7M`TCDKRg<BqOzGA~4np64O(G
v&gl68G&&SH&4N-I<8+`^91Kj@eT_A)uqH!nsSR7aVUMGqK&NeFz7GZf=y4WM

literal 0
HcmV?d00001

diff --git a/lib/matplotlib/tests/baseline_images/dviread/broken3.dvi b/lib/matplotlib/tests/baseline_images/dviread/broken3.dvi
new file mode 100644
index 0000000000000000000000000000000000000000..5c64bcc7d332230aa5af7618ddace2dc82863922
GIT binary patch
literal 856
zcmey)#MnIPfQ&T*5HP=xRtQOrP{=PWDJU&bFfuSS)iX5EGcd6-G%&U32C85LDI)~_
z1Hl5ON(P4B1%DSaFf3rQ2Q!}l$%(!U44>J(KPFT%Z~=`0Vb<i_B13~sAsLy))0jg;
zGm9rDPWI`Y|M#<L@5Dc8X`k6{PF@hizz$Ri!Yn}5<{KUR`X(}*?c5a7JMmwr-Fzl>
z8P~pv%#SM~CYPiZmrPE{pWrT=T$-DjH(|%)lD;K8Ae-$N7}D~KKvsa%Wagz$&P^;S
z$jL9s$xKo&o}5yaS(KWX(zmb|th5E>RmMmLhQ7Y}ia##&_RfEI&jCpO4dCgW#Bkxd
z{bvq;-diyYtRNpie36@Jx>>RhEIFT1S*;UjUn&Dj=&AY4Ul;XGC=dP2+QvLP3a$g>
z6VnYweLxEY6%X_RO+9_me*UNA_RkEzHkJy(!-p+7H?_jhV09EwA>YYAKy_y(DQz_9
z>+1#Sxq8xJ{+DM7s|A4O00rj%v{3<yK7MEQnTvZtk{z-sxtWFrTPuJLyz@1DK1(?t
z#DT&q9?j>}xYpadsW>yQZzAu<V?Z?!!zM9&yqE(vT@WNNiQ)UHfcf8k+}&sdH01Bc
z>`f^^rJ!(~FM0CMdye3O)a1;>9L2uA9#B$Y0%Br-7M`TCDKRg<BqOzGA~4np64O(G
w&gl68G&&SH&4N-I<8+`^91Kj@eT_A)uqH!nsSR7aVUMGq1q@6ZneKxD0PQ*!zW@LL

literal 0
HcmV?d00001

diff --git a/lib/matplotlib/tests/baseline_images/dviread/broken4.dvi b/lib/matplotlib/tests/baseline_images/dviread/broken4.dvi
new file mode 100644
index 0000000000000000000000000000000000000000..79c30e33124472a804c2e69698e8f2d29a49bc4e
GIT binary patch
literal 856
zcmey)#MnIPfQ&T*5HP=xRtQOrP{=PWDJU&bFfuSS)iX5EGcd6-G%&U32C85LDI)~_
z1Hl5ON(P4B1%DSaFf3rQ2Q!}l$%(!U44>J(KPFT%Z~=`0Vb<i_B13~sAsLy))0jg;
zGm9rDPWI`Y|M#<L@5Dc8X`k6{PF@hizz$Ri!Yn}5<{KUR`X(}*?c5a7JMmwr-Fzl>
z8P~pv%#SM~CYPiZmrPE{pWrT=T$-DjH(|%)lD;K8Ae-$N7}D~KKvsa%Wagz$&P^;S
z$jL9s$xKo&o}5yaS(KWX(zmb|th5E>RmMmLhQ7Y}ia##&_RfEI&jCpO4dCgW#Bkxd
z{bvq;-diyYtRNpie36@Jx>>RhEIFT1S*;UjUn&Dj=&AY4Ul;XGC=dP2+QvLP3a$g>
z6VnYweLxEY6%X_RO+9_me*UNA_RkEzHkJy(!-p+7H?_jhV09EwA>YYAKy_y(DQz_9
z>+1#Sxq8xJ{+DM7s|A4O00rj%v{3<yK7MEQnTvZtk{z-sxtWFrTPuJLyz@1DK1(?t
z#DT&q9?j>}xYpadsW>yQZzAu<V?Z?!!zM9&yqE(vT@WNNiQ)UHfcf8k+}&sdH01Bc
z>`f^^rJ!(~FM0CMdye3O)a1;>9L2uA9#B$Y0%Br-7M`TCDKRg<BqOzGA~4np64O(G
u&gl68G&&SH&4N-I<8+`^91Kj@eT_A)uqH!nsSR7aVUMGqApbJm2Lk}&&=s5j

literal 0
HcmV?d00001

diff --git a/lib/matplotlib/tests/baseline_images/dviread/broken5.dvi b/lib/matplotlib/tests/baseline_images/dviread/broken5.dvi
new file mode 100644
index 0000000000000000000000000000000000000000..7d7fdcbd8f024962c0400bcf41ed6a0193544613
GIT binary patch
literal 856
zcmey)#MnIPfQ&T*5HP=xRtQOrP{=PWDJU&bFfuSS)iX5EGcd6-G%&U32C85LDI)~_
z1Hl5ON(P4B1%DSaFf3rQ2Q!}l$%(!U44>J(KPFT%Z~=`0Vb<i_B13~sAsLy))0jg;
zGm9rDPWI`Y|M#<L@5Dc8X`k6{PF@hizz$Ri!Yn}5<{KUR`X(}*?c5a7JMmwr-Fzl>
z8P~pv%#SM~CYPiZmrPE{pWrT=T$-DjH(|%)lD;K8Ae-$N7}D~KKvsa%Wagz$&P^;S
z$jL9s$xKo&o}5yaS(KWX(zmb|th5E>RmMmLhQ7Y}ia##&_RfEI&jCpO4dCgW#Bkxd
z{bvq;-diyYtRNpie36@Jx>>RhEIFT1S*;UjUn&Dj=&AY4Ul;XGC=dP2+QvLP3a$g>
z6VnYweLxEY6%X_RO+9_me*UNA_RkEzHkJy(!-p+7H?_jhV09EwA>YYAKy_y(DQz_9
z>+1#Sxq8xJ{+DM7s|A4O00rj%v{3<yK7MEQnTvZtk{z-sxtWFrTPuJLyz@1DK1(?t
z#DT&q9?j>}xYpadsW>yQZzAu<V?Z?!!zM9&yqE(vT@WNNiQ)UHfcf8k+}&sdH01Bc
z>`f^^rJ!(~FM0CMdye3O)a1;>9L2uA9#B$Y0%Br-7M`TCDKRg<BqOzGA~4np64O(G
w&gl68G&&SH&4N-I<8+`^91Kj@eO-?&uCOLUY^e=fykU={pFpQ=WV#Oq0JGW^+W-In

literal 0
HcmV?d00001

diff --git a/lib/matplotlib/tests/baseline_images/dviread/virtual.vf b/lib/matplotlib/tests/baseline_images/dviread/virtual.vf
new file mode 100644
index 0000000000000000000000000000000000000000..2b64f1df3da61880d8abdddade72a2206d1006e4
GIT binary patch
literal 56
vcmex<ih&Ub85S@wd}aUx0U!zFF|a1*78x3R2Fd{eOpq-(H?_jh;KvUDA14b}

literal 0
HcmV?d00001

diff --git a/lib/matplotlib/tests/baseline_images/dviread/virtual.vpl b/lib/matplotlib/tests/baseline_images/dviread/virtual.vpl
new file mode 100644
index 000000000000..0c051a508b65
--- /dev/null
+++ b/lib/matplotlib/tests/baseline_images/dviread/virtual.vpl
@@ -0,0 +1,23 @@
+(FAMILY TESTING)
+(COMMENT Test data for matplotlib)
+(COMMENT Run vptovf virtual.vpl to obtain virtual.vf)
+(FACE O 352)
+(CODINGSCHEME TEX TEXT)
+(DESIGNSIZE R 10.0)
+(FONTDIMEN
+   (SLANT R 0.0)
+   (SPACE R 0.333334)
+   (STRETCH R 0.166667)
+   (SHRINK R 0.111112)
+   (XHEIGHT R 0.430555)
+   (QUAD R 1.000003)
+   (EXTRASPACE R 0.111112)
+   )
+(MAPFONT D 0
+   (FONTNAME cmr10)
+   (FONTDSIZE R 10.0)
+   )
+(MAPFONT D 1
+   (FONTNAME cmex10)
+   (FONTDSIZE R 10.0)
+   )
diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py
index 4a5b924a2312..6091c106db22 100644
--- a/lib/matplotlib/tests/test_dviread.py
+++ b/lib/matplotlib/tests/test_dviread.py
@@ -77,6 +77,28 @@ def test_dviread():
     assert data == correct
 
 
+@skip_if_command_unavailable(["kpsewhich", "-version"])
+def test_dviread_get_fonts():
+    dir = os.path.join(os.path.dirname(__file__), 'baseline_images', 'dviread')
+    with dr.Dvi(os.path.join(dir, 'test.dvi'), None) as dvi:
+        assert dvi.fontnames == \
+            ['cmex10', 'cmmi10', 'cmmi5', 'cmr10', 'cmr5', 'cmr7']
+    with dr.Vf(os.path.join(dir, 'virtual.vf')) as vf:
+        assert vf.fontnames == ['cmex10', 'cmr10']
+
+
+def test_dviread_get_fonts_error_handling():
+    dir = os.path.join(os.path.dirname(__file__), 'baseline_images', 'dviread')
+    for n, message in [(1, "too few 223 bytes"),
+                       (2, "post-postamble identification"),
+                       (3, "postamble offset"),
+                       (4, "postamble not found"),
+                       (5, "opcode 127 in postamble")]:
+        with pytest.raises(ValueError) as e:
+            dr.Dvi(os.path.join(dir, "broken%d.dvi" % n), None)
+        assert message in str(e.value)
+
+
 def test_TeXSupportCache(tmpdir):
     dbfile = str(tmpdir / "test.db")
     cache = dr.TeXSupportCache(filename=dbfile)

From 940925dc18498e7d6478093d308f3d07c1072097 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Fri, 19 Jan 2018 11:08:20 +0200
Subject: [PATCH 5/7] Small improvements to the Vf class

Expose the scale attribute, allow overriding the widths, add
some convenience methods.
---
 lib/matplotlib/dviread.py | 52 +++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py
index 7d464209d06c..fb69369fc248 100644
--- a/lib/matplotlib/dviread.py
+++ b/lib/matplotlib/dviread.py
@@ -422,9 +422,9 @@ def _put_char_real(self, char):
             self.text.append(Text(self.h, self.v, font, char,
                                   font._width_of(char)))
         else:
-            scale = font._scale
+            scale = font.scale
             for x, y, f, g, w in font._vf[char].text:
-                newf = DviFont(scale=_mul2012(scale, f._scale),
+                newf = DviFont(scale=_mul2012(scale, f.scale),
                                tfm=f._tfm, texname=f.texname, vf=f._vf)
                 self.text.append(Text(self.h + _mul2012(x, scale),
                                       self.v + _mul2012(y, scale),
@@ -580,16 +580,19 @@ class DviFont(object):
     ----------
 
     scale : float
-        Factor by which the font is scaled from its natural size.
-    tfm : Tfm
-        TeX font metrics for this font
+       Factor by which the font is scaled from its natural size,
+       represented as an integer in 20.12 fixed-point format.
+    tfm : Tfm, may be None if widths given
+       TeX Font Metrics file for this font
     texname : bytes
        Name of the font as used internally by TeX and friends, as an
        ASCII bytestring. This is usually very different from any external
        font names, and :class:`dviread.PsfontsMap` can be used to find
        the external name of the font.
-    vf : Vf
+    vf : Vf or None
        A TeX "virtual font" file, or None if this font is not virtual.
+    widths : list of integers, optional
+       Widths for this font. Overrides the widths read from the tfm file.
 
     Attributes
     ----------
@@ -598,26 +601,37 @@ class DviFont(object):
     size : float
        Size of the font in Adobe points, converted from the slightly
        smaller TeX points.
+    scale : int
+       Factor by which the font is scaled from its natural size,
+       represented as an integer in 20.12 fixed-point format.
     widths : list
        Widths of glyphs in glyph-space units, typically 1/1000ths of
        the point size.
 
     """
-    __slots__ = ('texname', 'size', 'widths', '_scale', '_vf', '_tfm')
+    __slots__ = ('texname', 'size', 'widths', 'scale', '_vf', '_tfm')
 
-    def __init__(self, scale, tfm, texname, vf):
+    def __init__(self, scale, tfm, texname, vf, widths=None):
         if not isinstance(texname, bytes):
             raise ValueError("texname must be a bytestring, got %s"
                              % type(texname))
-        self._scale, self._tfm, self.texname, self._vf = \
-            scale, tfm, texname, vf
+        self.scale, self._tfm, self.texname, self._vf, self.widths = \
+            scale, tfm, texname, vf, widths
         self.size = scale * (72.0 / (72.27 * 2**16))
-        try:
-            nchars = max(tfm.width) + 1
-        except ValueError:
-            nchars = 0
-        self.widths = [(1000*tfm.width.get(char, 0)) >> 20
-                       for char in range(nchars)]
+
+        if self.widths is None:
+            try:
+                nchars = max(tfm.width) + 1
+            except ValueError:
+                nchars = 0
+            self.widths = [(1000*tfm.width.get(char, 0)) >> 20
+                           for char in range(nchars)]
+
+    def __repr__(self):
+        return '<DviFont %s *%f>' % (self.texname, self.scale / 2**20)
+
+    def __hash__(self):
+        return 1001 * hash(self.texname) + hash(self.size)
 
     def __eq__(self, other):
         return self.__class__ == other.__class__ and \
@@ -633,7 +647,7 @@ def _width_of(self, char):
 
         width = self._tfm.width.get(char, None)
         if width is not None:
-            return _mul2012(width, self._scale)
+            return _mul2012(width, self.scale)
         _log.debug('No width for char %d in font %s.', char, self.texname)
         return 0
 
@@ -651,7 +665,7 @@ def _height_depth_of(self, char):
                            name, char, self.texname)
                 result.append(0)
             else:
-                result.append(_mul2012(value, self._scale))
+                result.append(_mul2012(value, self.scale))
         return result
 
 
@@ -1374,7 +1388,7 @@ def _fontfile(cls, suffix, texname):
             fPrev = None
             for x, y, f, c, w in page.text:
                 if f != fPrev:
-                    print('font', f.texname, 'scaled', f._scale/pow(2.0, 20))
+                    print('font', f.texname, 'scaled', f.scale/pow(2.0, 20))
                     fPrev = f
                 print(x, y, c, 32 <= c < 128 and chr(c) or '.', w)
             for x, y, w, h in page.boxes:

From 545b4077f46a9fabb283e9e202dd4bed956695a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Fri, 19 Jan 2018 11:39:25 +0200
Subject: [PATCH 6/7] Add a way to represent dvi files in the cache

Along with methods to add and query the tables.
---
 lib/matplotlib/dviread.py            | 390 ++++++++++++++++++++++++++-
 lib/matplotlib/tests/test_dviread.py |  17 ++
 2 files changed, 402 insertions(+), 5 deletions(-)

diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py
index fb69369fc248..cfd0afe647a0 100644
--- a/lib/matplotlib/dviread.py
+++ b/lib/matplotlib/dviread.py
@@ -27,8 +27,8 @@
 import sqlite3
 import sys
 import textwrap
-
 import numpy as np
+import zlib
 
 from matplotlib import cbook, get_cachedir, rcParams
 from matplotlib.compat import subprocess
@@ -1102,13 +1102,16 @@ class TeXSupportCacheError(Exception):
 
 class TeXSupportCache:
     """A persistent cache of data related to support files related to dvi
-    files produced by TeX. Currently holds results from :program:`kpsewhich`,
-    in future versions could hold pre-parsed font data etc.
+    files produced by TeX. Currently holds results from :program:`kpsewhich`
+    and the contents of parsed dvi files, in future versions could include
+    pre-parsed font data etc.
 
     Usage::
 
       # create or get the singleton instance
       cache = TeXSupportCache.get_cache()
+
+      # insert and query some pathnames
       with cache.connection as transaction:
           cache.update_pathnames(
               {"pdftex.map": "/usr/local/pdftex.map",
@@ -1120,6 +1123,25 @@ class TeXSupportCache:
       # optional after inserting new data, may improve query performance:
       cache.optimize()
 
+      # insert and query some dvi file contents
+      with cache.connection as transaction:
+          id = cache.dvi_new_file("/path/to/foobar.dvi", transaction)
+          font_ids = cache.dvi_font_sync_ids(['font1', 'font2'], transaction)
+          cache.dvi_font_sync_metrics(DviFont1, transaction)
+          cache.dvi_font_sync_metrics(DviFont2, transaction)
+          for i, box in enumerate(boxes):
+               cache.dvi_add_box(box, id, 0, i, transaction)
+          for i, text in enumerate(texts):
+               cache.dvi_add_text(text, id, 0, i, font_ids['font1'],
+                                  transaction)
+      fonts = cache.dvi_fonts(id)
+      assert cache.dvi_page_exists(id, 0)
+      bbox = cache.dvi_page_boundingbox(id, 0)
+      for box in dvi_page_boxes(id, 0):
+          handle_box(box)
+      for text in dvi_page_texts(id, 0):
+          handle_text(text)
+
     Parameters
     ----------
 
@@ -1137,7 +1159,7 @@ class TeXSupportCache:
     """
 
     __slots__ = ('connection')
-    schema_version = 1  # should match PRAGMA user_version in _create
+    schema_version = 2  # should match PRAGMA user_version in _create
     instance = None
 
     @classmethod
@@ -1177,6 +1199,7 @@ def debug_sql(sql):
     def _create(self):
         """Create the database."""
         with self.connection as conn:
+            # kpsewhich results
             conn.executescript(
                 """
                 PRAGMA page_size=4096;
@@ -1184,7 +1207,50 @@ def _create(self):
                     filename TEXT PRIMARY KEY NOT NULL,
                     pathname TEXT
                 ) WITHOUT ROWID;
-                PRAGMA user_version=1;
+                """)
+            # dvi files
+            conn.executescript(
+                """
+                CREATE TABLE dvi_file(
+                    id INTEGER PRIMARY KEY,
+                    name UNIQUE NOT NULL,
+                    mtime INTEGER,
+                    size INTEGER
+                );
+                CREATE TABLE dvi_font(
+                    id INTEGER PRIMARY KEY,
+                    texname UNIQUE NOT NULL
+                );
+                CREATE TABLE dvi_font_metrics(
+                    id INTEGER NOT NULL
+                        REFERENCES dvi_font(id) ON DELETE CASCADE,
+                    scale INTEGER NOT NULL,
+                    widths BLOB NOT NULL,
+                    PRIMARY KEY (id, scale)
+                );
+                CREATE TABLE dvi(
+                    fileid INTEGER NOT NULL
+                        REFERENCES dvi_file(id) ON DELETE CASCADE,
+                    pageno INTEGER NOT NULL,
+                    seq INTEGER NOT NULL,
+                    x INTEGER NOT NULL,
+                    y INTEGER NOT NULL,
+                    height INTEGER NOT NULL,
+                    width INTEGER NOT NULL,
+                    depth INTEGER NOT NULL,
+                    fontid INTEGER,
+                    fontscale INTEGER,
+                    glyph INTEGER,
+                    PRIMARY KEY (fileid, pageno, seq)
+                ) WITHOUT ROWID;
+                CREATE TABLE dvi_baseline(
+                    fileid INTEGER NOT NULL
+                        REFERENCES dvi_file(id) ON DELETE CASCADE,
+                    pageno INTEGER NOT NULL,
+                    baseline REAL NOT NULL,
+                    PRIMARY KEY (fileid, pageno)
+                ) WITHOUT ROWID;
+                PRAGMA user_version=2;
                 """)
 
     def optimize(self):
@@ -1231,6 +1297,320 @@ def update_pathnames(self, mapping, transaction):
             "VALUES (?, ?)",
             mapping.items())
 
+    # Dvi files
+
+    def dvi_new_file(self, name, transaction):
+        """Record a dvi file in the cache.
+
+        Parameters
+        ----------
+        name : str
+            Name of the file to add.
+        transaction : obtained via the context manager of self.connection
+        """
+
+        stat = os.stat(name)
+        transaction.execute("DELETE FROM dvi_file WHERE name=?", (name,))
+        transaction.execute(
+            "INSERT INTO dvi_file (name, mtime, size) VALUES (?, ?, ?)",
+            (name, int(stat.st_mtime), int(stat.st_size)))
+        return transaction.execute("SELECT last_insert_rowid()").fetchone()[0]
+
+    def dvi_id(self, name):
+        """Query the database identifier of a given dvi file.
+
+        Parameters
+        ----------
+        name : str
+            Name of the file to query.
+
+        Returns
+        -------
+        int or None
+        """
+
+        rows = self.connection.execute(
+            "SELECT id, mtime, size FROM dvi_file WHERE name=? LIMIT 1",
+            (name,)).fetchall()
+        if rows:
+            id, mtime, size = rows[0]
+            stat = os.stat(name)
+            if mtime == int(stat.st_mtime) and size == stat.st_size:
+                return id
+
+    def dvi_font_sync_ids(self, fontnames, transaction):
+        """Record dvi fonts in the cache and return their database
+        identifiers.
+
+        Parameters
+        ----------
+        fontnames : list of str
+            TeX names of fonts
+        transaction : obtained via the context manager of self.connection
+
+        Returns
+        -------
+        mapping from texname to int
+        """
+
+        transaction.executemany(
+            "INSERT OR IGNORE INTO dvi_font (texname) VALUES (?)",
+            ((name,) for name in fontnames))
+        fontid = {}
+        for name in fontnames:
+            fontid[name], = transaction.execute(
+                "SELECT id FROM dvi_font WHERE texname=?",
+                (name,)).fetchone()
+        return fontid
+
+    def dvi_font_sync_metrics(self, dvifont, transaction):
+        """Record dvi font metrics in the cache.
+
+        Parameters
+        ----------
+        dvifont : DviFont
+        transaction : obtained via the context manager of self.connection
+        """
+
+        exists = bool(transaction.execute("""
+            SELECT 1 FROM dvi_font_metrics m, dvi_font f
+            WHERE m.id=f.id AND f.texname=:texname
+            AND m.scale=:scale LIMIT 1
+        """, {
+            "texname": dvifont.texname.decode('ascii'),
+            "scale": dvifont.scale
+        }).fetchall())
+
+        if not exists:
+            # Widths are given in 32-bit words in tfm, although the normal
+            # range is around 1000 units. This and the repetition of values
+            # make the width data very compressible.
+            widths = struct.pack('<{}I'.format(len(dvifont.widths)),
+                                 *dvifont.widths)
+            widths = zlib.compress(widths, 9)
+            transaction.execute("""
+                INSERT INTO dvi_font_metrics (id, scale, widths)
+                SELECT id, :scale, :widths FROM dvi_font WHERE texname=:texname
+            """, {
+                "texname": dvifont.texname.decode('ascii'),
+                "scale": dvifont.scale,
+                "widths": widths
+            })
+
+    def dvi_fonts(self, fileid):
+        """Query the dvi fonts of a given dvi file.
+
+        Parameters
+        ----------
+        fileid : int
+            File identifier as returned by dvi_id
+
+        Returns
+        -------
+        mapping from (str, float) to DviFont
+            Maps from (TeX name, scale) to DviFont objects.
+        """
+
+        rows = self.connection.execute("""
+            SELECT texname, fontscale, widths FROM
+            (SELECT DISTINCT fontid, fontscale FROM dvi WHERE fileid=?) d
+            JOIN dvi_font f ON (d.fontid=f.id)
+            JOIN dvi_font_metrics m ON (d.fontid=m.id AND d.fontscale=m.scale)
+        """, (fileid,)).fetchall()
+
+        def decode(widths):
+            data = zlib.decompress(widths)
+            n = len(data) // 4
+            return struct.unpack('<{}I'.format(n), data)
+
+        return {(row['texname'], row['fontscale']):
+                DviFont(texname=row['texname'].encode('ascii'),
+                        scale=row['fontscale'],
+                        widths=decode(row['widths']),
+                        tfm=None, vf=None)
+                for row in rows}
+
+    def dvi_add_box(self, box, fileid, pageno, seq, transaction):
+        """Record a box object of a dvi file.
+
+        Parameters
+        ----------
+        box : Box
+        fileid : int
+            As returned by dvi_id
+        pageno : int
+            Page number
+        seq : int
+            Used to order the boxes
+        transaction : obtained via the context manager of self.connection
+        """
+
+        transaction.execute("""
+            INSERT INTO dvi (
+                fileid, pageno, seq, x, y, height, width, depth
+            ) VALUES (:fileid, :pageno, :seq, :x, :y, :height, :width, 0)
+        """, {
+            "fileid": fileid, "pageno": pageno, "seq": seq,
+            "x": box.x, "y": box.y, "height": box.height, "width": box.width
+        })
+
+    def dvi_add_text(self, text, fileid, pageno, seq, fontid, transaction):
+        """Record a box object of a dvi file.
+
+        Parameters
+        ----------
+        box : Text
+        fileid : int
+            As returned by dvi_id
+        pageno : int
+            Page number
+        seq : int
+            Used to order the boxes
+        fontid : int
+            As returned by dvi_font_sync_ids
+        transaction : obtained via the context manager of self.connection
+        """
+
+        height, depth = text.font._height_depth_of(text.glyph)
+        transaction.execute("""
+            INSERT INTO dvi (
+                fileid, pageno, seq,
+                x, y, height, width, depth, fontid, fontscale, glyph
+            ) VALUES (
+                :fileid, :pageno, :seq,
+                :x, :y, :height, :width, :depth, :fontid, :fontscale, :glyph
+            )
+        """, {
+            "fileid": fileid, "pageno": pageno, "seq": seq,
+            "x": text.x, "y": text.y, "width": text.width,
+            "height": height, "depth": depth,
+            "fontid": fontid, "fontscale": text.font.scale, "glyph": text.glyph
+        })
+
+    def dvi_page_exists(self, fileid, pageno):
+        """Query if a page exists in the dvi file.
+
+        Parameters
+        ----------
+        fileid : int
+            As returned by dvi_id
+        pageno : int
+            Page number
+
+        Returns
+        -------
+        boolean
+        """
+        return bool(self.connection.execute(
+            "SELECT 1 FROM dvi WHERE fileid=? AND pageno=? LIMIT 1",
+            (fileid, pageno)).fetchall())
+
+    def dvi_page_boundingbox(self, fileid, pageno):
+        """Query the bounding box of a page
+
+        Parameters
+        ----------
+        fileid : int
+            As returned by dvi_id
+        pageno
+            Page number
+
+        Returns
+        -------
+        A namedtuple-like object with fields min_x, min_y, max_x,
+        max_y and max_y_pure (like max_y but ignores depth).
+        """
+
+        return self.connection.execute("""
+                SELECT min(x)          min_x,
+                       min(y - height) min_y,
+                       max(x + width)  max_x,
+                       max(y + depth)  max_y,
+                       max(y)          max_y_pure
+                FROM dvi WHERE fileid=? AND pageno=?
+                """, (fileid, pageno)).fetchone()
+
+    def dvi_page_boxes(self, fileid, pageno):
+        """Query the boxes of a page
+
+        Parameters
+        ----------
+        fileid : int
+            As returned by dvi_id
+        pageno
+            Page number
+
+        Returns
+        -------
+        An iterator of (x, y, height, width) tuples of boxes
+        """
+
+        return self.connection.execute("""
+            SELECT x, y, height, width FROM dvi
+            WHERE fileid=? AND pageno=? AND fontid IS NULL ORDER BY seq
+        """, (fileid, pageno)).fetchall()
+
+    def dvi_page_text(self, fileid, pageno):
+        """Query the text of a page
+
+        Parameters
+        ----------
+        fileid : int
+            As returned by dvi_id
+        pageno
+            Page number
+
+        Returns
+        -------
+        An iterator of (x, y, height, width, depth, texname, fontscale)
+        tuples of text
+        """
+
+        return self.connection.execute("""
+            SELECT x, y, height, width, depth, f.texname, fontscale, glyph
+            FROM dvi JOIN dvi_font f ON (dvi.fontid=f.id)
+            WHERE fileid=? AND pageno=? AND fontid IS NOT NULL ORDER BY seq
+        """, (fileid, pageno)).fetchall()
+
+    def dvi_add_baseline(self, fileid, pageno, baseline, transaction):
+        """Record the baseline of a dvi page
+
+        Parameters
+        ----------
+        fileid : int
+            As returned by dvi_id
+        pageno : int
+            Page number
+        baseline : float
+        transaction : obtained via the context manager of self.connection
+        """
+
+        transaction.execute("""
+            INSERT INTO dvi_baseline (fileid, pageno, baseline)
+            VALUES (:fileid, :pageno, :baseline)
+        """, {"fileid": fileid, "pageno": pageno, "baseline": baseline})
+
+    def dvi_get_baseline(self, fileid, pageno):
+        """Query the baseline of a dvi page
+
+        Parameters
+        ----------
+        fileid : int
+            As returned by dvi_id
+        pageno : int
+            Page number
+
+        Returns
+        -------
+        float
+        """
+
+        rows = self.connection.execute(
+            "SELECT baseline FROM dvi_baseline WHERE fileid=? AND pageno=?",
+            (fileid, pageno)).fetchall()
+        if rows:
+            return rows[0][0]
+
 
 def find_tex_files(filenames, cache=None):
     """Find multiple files in the texmf tree. This can be more efficient
diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py
index 6091c106db22..3d61a4140eb3 100644
--- a/lib/matplotlib/tests/test_dviread.py
+++ b/lib/matplotlib/tests/test_dviread.py
@@ -111,6 +111,23 @@ def test_TeXSupportCache(tmpdir):
     assert cache.get_pathnames(['xyzzy', 'fontfile']) == \
         {'xyzzy': '/xyzzy.dat', 'fontfile': None}
 
+    # check that modifying a dvi file invalidates the cache
+    filename = str(tmpdir / "file.dvi")
+    with open(filename, "wb") as f:
+        f.write(b'qwerty')
+    os.utime(filename, (0, 0))
+    with cache.connection as t:
+        id1 = cache.dvi_new_file(filename, t)
+    assert cache.dvi_id(filename) == id1
+
+    with open(filename, "wb") as f:
+        f.write(b'asfdg')
+    os.utime(filename, (0, 0))
+    assert cache.dvi_id(filename) is None
+    with cache.connection as t:
+        id2 = cache.dvi_new_file(filename, t)
+    assert cache.dvi_id(filename) == id2
+
 
 def test_TeXSupportCache_versioning(tmpdir):
     dbfile = str(tmpdir / "test.db")

From 2418413fcef3619ec1ce300cb2bad31d586c5285 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Fri, 19 Jan 2018 11:44:10 +0200
Subject: [PATCH 7/7] Implement reading dvi files into the cache

Rename the Dvi class to _DviReader and use it only for storing
the files into the cache. The new Dvi class reads from the cache,
after calling _DviReader to insert the file into it.
---
 lib/matplotlib/dviread.py            | 253 +++++++++++++++------------
 lib/matplotlib/tests/test_dviread.py |  10 +-
 2 files changed, 144 insertions(+), 119 deletions(-)

diff --git a/lib/matplotlib/dviread.py b/lib/matplotlib/dviread.py
index cfd0afe647a0..d40d9ac244a3 100644
--- a/lib/matplotlib/dviread.py
+++ b/lib/matplotlib/dviread.py
@@ -20,6 +20,7 @@
 from collections import namedtuple
 import enum
 from functools import lru_cache, partial, wraps
+from itertools import chain
 import logging
 import os
 import re
@@ -168,28 +169,23 @@ def wrapper(self, byte):
     return decorate
 
 
-class Dvi(object):
-    """
-    A reader for a dvi ("device-independent") file, as produced by TeX.
-    The current implementation can only iterate through pages in order.
+def _keep(func, keys):
+    """Return mapping from each k in keys to func(k)
+    such that func(k) is not None"""
+    return dict((k, v) for k, v in zip(keys, map(func, keys)) if v is not None)
 
-    This class can be used as a context manager to close the underlying
-    file upon exit. Pages can be read via iteration. Here is an overly
-    simple way to extract text without trying to detect whitespace::
 
-    >>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
-    >>>     for page in dvi:
-    >>>         print(''.join(chr(t.glyph) for t in page.text))
+class _DviReader(object):
+    """
+    A reader for a dvi ("device-independent") file, as produced by TeX.
+    This implementation is only used to store the file in a cache, from
+    which it is read by Dvi.
 
     Parameters
     ----------
 
     filename : str
         dvi file to read
-    dpi : number or None
-        Dots per inch, can be floating-point; this affects the
-        coordinates returned. Use None to get TeX's internal units
-        which are likely only useful for debugging.
     cache : TeXSupportCache instance, optional
         Support file cache instance, defaults to the TeXSupportCache
         singleton.
@@ -198,28 +194,28 @@ class Dvi(object):
     _dtable = [None] * 256
     _dispatch = partial(_dispatch, _dtable)
 
-    def __init__(self, filename, dpi, cache=None):
-        """
-        Read the data from the file named *filename* and convert
-        TeX's internal units to units of *dpi* per inch.
-        *dpi* only sets the units and does not limit the resolution.
-        Use None to return TeX's internal units.
-        """
+    def __init__(self, filename, cache=None):
         _log.debug('Dvi: %s', filename)
         if cache is None:
             cache = TeXSupportCache.get_cache()
         self.cache = cache
         self.file = open(filename, 'rb')
-        self.dpi = dpi
         self.fonts = {}
+        self.recursive_fonts = set()
         self.state = _dvistate.pre
         self.baseline = self._get_baseline(filename)
-        self.fontnames = sorted(set(self._read_fonts()))
+        self.fontnames = set(self._read_fonts())
         # populate kpsewhich cache with font pathnames
         find_tex_files([x + suffix for x in self.fontnames
                         for suffix in ('.tfm', '.vf', '.pfb')],
                        cache)
-        cache.optimize()
+        self._tfm = _keep(_tfmfile, self.fontnames)
+        self._vf = _keep(_vffile, self.fontnames)
+        for vf in self._vf.values():
+            self.fontnames.update(vf.fontnames)
+
+    def close(self):
+        self.file.close()
 
     def _get_baseline(self, filename):
         if rcParams['text.latex.preview']:
@@ -232,88 +228,32 @@ def _get_baseline(self, filename):
                 return float(depth)
         return None
 
-    def __enter__(self):
-        """
-        Context manager enter method, does nothing.
-        """
-        return self
-
-    def __exit__(self, etype, evalue, etrace):
-        """
-        Context manager exit method, closes the underlying file if it is open.
-        """
-        self.close()
-
-    def __iter__(self):
-        """
-        Iterate through the pages of the file.
-
-        Yields
-        ------
-        Page
-            Details of all the text and box objects on the page.
-            The Page tuple contains lists of Text and Box tuples and
-            the page dimensions, and the Text and Box tuples contain
-            coordinates transformed into a standard Cartesian
-            coordinate system at the dpi value given when initializing.
-            The coordinates are floating point numbers, but otherwise
-            precision is not lost and coordinate values are not clipped to
-            integers.
-        """
-        while True:
-            have_page = self._read()
-            if have_page:
-                yield self._output()
-            else:
-                break
-
-    def close(self):
-        """
-        Close the underlying file if it is open.
-        """
-        if not self.file.closed:
-            self.file.close()
-
-    def _output(self):
-        """
-        Output the text and boxes belonging to the most recent page.
-        page = dvi._output()
-        """
-        minx, miny, maxx, maxy = np.inf, np.inf, -np.inf, -np.inf
-        maxy_pure = -np.inf
-        for elt in self.text + self.boxes:
-            if isinstance(elt, Box):
-                x, y, h, w = elt
-                e = 0           # zero depth
-            else:               # glyph
-                x, y, font, g, w = elt
-                h, e = font._height_depth_of(g)
-            minx = min(minx, x)
-            miny = min(miny, y - h)
-            maxx = max(maxx, x + w)
-            maxy = max(maxy, y + e)
-            maxy_pure = max(maxy_pure, y)
-
-        if self.dpi is None:
-            # special case for ease of debugging: output raw dvi coordinates
-            return Page(text=self.text, boxes=self.boxes,
-                        width=maxx-minx, height=maxy_pure-miny,
-                        descent=maxy-maxy_pure)
-
-        # convert from TeX's "scaled points" to dpi units
-        d = self.dpi / (72.27 * 2**16)
-        if self.baseline is None:
-            descent = (maxy - maxy_pure) * d
-        else:
-            descent = self.baseline
-
-        text = [Text((x-minx)*d, (maxy-y)*d - descent, f, g, w*d)
-                for (x, y, f, g, w) in self.text]
-        boxes = [Box((x-minx)*d, (maxy-y)*d - descent, h*d, w*d)
-                 for (x, y, h, w) in self.boxes]
-
-        return Page(text=text, boxes=boxes, width=(maxx-minx)*d,
-                    height=(maxy_pure-miny)*d, descent=descent)
+    def store(self):
+        c = self.cache
+        with c.connection as t:
+            fileid = c.dvi_new_file(self.file.name, t)
+            _log.debug('fontnames is %s', self.fontnames)
+            fontid = c.dvi_font_sync_ids(self.fontnames, t)
+
+            pageno = 0
+            while True:
+                if not self._read():
+                    break
+                for seq, elt in enumerate(self.text + self.boxes):
+                    if isinstance(elt, Box):
+                        c.dvi_add_box(elt, fileid, pageno, seq, t)
+                    else:
+                        texname = elt.font.texname.decode('ascii')
+                        c.dvi_add_text(elt, fileid, pageno, seq,
+                                       fontid[texname], t)
+                pageno += 1
+
+            for dvifont in chain(self.recursive_fonts, self.fonts.values()):
+                c.dvi_font_sync_metrics(dvifont, t)
+            if self.baseline is not None:
+                c.dvi_add_baseline(fileid, 0, self.baseline, t)
+        c.optimize()
+        return fileid
 
     def _read_fonts(self):
         """Read the postamble of the file and return a list of fonts used."""
@@ -360,6 +300,8 @@ def _read_fonts(self):
                     _arg(1, False, self, None),
                     _arg(1, False, self, None))
                 fontname = file.read(a + length)[-length:].decode('ascii')
+                _log.debug('dvi._read_fonts(%s): encountered %s',
+                           self.file.name, fontname)
                 fonts.append(fontname)
             elif byte == 249:
                 break
@@ -426,6 +368,7 @@ def _put_char_real(self, char):
             for x, y, f, g, w in font._vf[char].text:
                 newf = DviFont(scale=_mul2012(scale, f.scale),
                                tfm=f._tfm, texname=f.texname, vf=f._vf)
+                self.recursive_fonts.add(newf)
                 self.text.append(Text(self.h + _mul2012(x, scale),
                                       self.v + _mul2012(y, scale),
                                       newf, g, newf._width_of(g)))
@@ -522,14 +465,12 @@ def _fnt_def(self, k, c, s, d, a, l):
     def _fnt_def_real(self, k, c, s, d, a, l):
         n = self.file.read(a + l)
         fontname = n[-l:].decode('ascii')
-        tfm = _tfmfile(fontname)
+        tfm = self._tfm.get(fontname)
         if tfm is None:
             raise FileNotFoundError("missing font metrics file: %s" % fontname)
         if c != 0 and tfm.checksum != 0 and c != tfm.checksum:
             raise ValueError('tfm checksum mismatch: %s' % n)
-
-        vf = _vffile(fontname)
-
+        vf = self._vf.get(fontname)
         self.fonts[k] = DviFont(scale=s, tfm=tfm, texname=n, vf=vf)
 
     @_dispatch(247, state=_dvistate.pre, args=('u1', 'u4', 'u4', 'u4', 'u1'))
@@ -669,7 +610,89 @@ def _height_depth_of(self, char):
         return result
 
 
-class Vf(Dvi):
+class Dvi(object):
+    """
+    A representation of a dvi ("device-independent") file, as produced by TeX.
+
+    Parameters
+    ----------
+
+    filename : str
+    dpi : float or None
+    cache : TeXSupportCache, optional
+
+    Attributes
+    ----------
+
+    filename : str
+    dpi : float or None
+    cache : TeXSupportCache
+
+
+    """
+    def __init__(self, filename, dpi, cache=None):
+        if cache is None:
+            cache = TeXSupportCache.get_cache()
+        self.cache = cache
+        self.filename = filename
+        self.dpi = dpi
+        self._filename_id = cache.dvi_id(filename)
+        if self._filename_id is None:
+            self._filename_id = _DviReader(filename, cache).store()
+        self._fonts = cache.dvi_fonts(self._filename_id)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, etype, evalue, etrace):
+        pass
+
+    def __getitem__(self, pageno):
+        if self.cache.dvi_page_exists(self._filename_id, pageno):
+            return self._output(pageno)
+        raise IndexError
+
+    def _output(self, page):
+        extrema = self.cache.dvi_page_boundingbox(self._filename_id, page)
+        min_x, min_y, max_x, max_y, max_y_pure = (
+            extrema[n] for n in ('min_x', 'min_y', 'max_x',
+                                 'max_y', 'max_y_pure'))
+        boxes = self.cache.dvi_page_boxes(self._filename_id, page)
+        text = self.cache.dvi_page_text(self._filename_id, page)
+        baseline = self.cache.dvi_get_baseline(self._filename_id, page)
+        if self.dpi is None:
+            return Page(text=[Text(x=row['x'], y=row['y'],
+                                   font=self._fonts[(row['texname'],
+                                                     row['fontscale'])],
+                                   glyph=row['glyph'], width=row['width'])
+                              for row in text],
+                        boxes=[Box(x=row['x'], y=row['y'],
+                                   height=row['height'], width=row['width'])
+                               for row in boxes],
+                        width=max_x-min_x,
+                        height=max_y_pure-min_y,
+                        descent=max_y-max_y_pure)
+        d = self.dpi / (72.27 * 2**16)
+        descent = \
+            baseline if baseline is not None else (max_y - max_y_pure) * d
+
+        return Page(text=[Text((row['x'] - min_x) * d,
+                               (max_y - row['y']) * d - descent,
+                               self._fonts[(row['texname'], row['fontscale'])],
+                               row['glyph'],
+                               row['width'] * d)
+                          for row in text],
+                    boxes=[Box((row['x'] - min_x) * d,
+                               (max_y - row['y']) * d - descent,
+                               row['height'] * d,
+                               row['width'] * d)
+                           for row in boxes],
+                    width=(max_x - min_x) * d,
+                    height=(max_y_pure - min_y) * d,
+                    descent=descent)
+
+
+class Vf(_DviReader):
     """
     A virtual font (\\*.vf file) containing subroutines for dvi files.
 
@@ -693,12 +716,12 @@ class Vf(Dvi):
 
     The virtual font format is a derivative of dvi:
     http://mirrors.ctan.org/info/knuth/virtual-fonts
-    This class reuses some of the machinery of `Dvi`
+    This class reuses some of the machinery of `_DviReader`
     but replaces the `_read` loop and dispatch mechanism.
     """
 
     def __init__(self, filename, cache=None):
-        Dvi.__init__(self, filename, dpi=0, cache=cache)
+        _DviReader.__init__(self, filename, cache=cache)
         try:
             self._first_font = None
             self._chars = {}
@@ -723,6 +746,8 @@ def _read_fonts(self):
                 _, _, _, a, length = [self._arg(x) for x in (4, 4, 4, 1, 1)]
                 fontname = self.file.read(a + length)[-length:].decode('ascii')
                 fonts.append(fontname)
+                _log.debug('Vf._read_fonts(%s): encountered %s',
+                           self.file.name, fontname)
             elif byte == 247:
                 _, k = self._arg(1), self._arg(1)
                 _ = self.file.read(k)
@@ -752,7 +777,7 @@ def _read(self):
                     if byte in (139, 140) or byte >= 243:
                         raise ValueError(
                             "Inappropriate opcode %d in vf file" % byte)
-                    Dvi._dtable[byte](self, byte)
+                    _DviReader._dtable[byte](self, byte)
                     continue
 
             # We are outside a packet
diff --git a/lib/matplotlib/tests/test_dviread.py b/lib/matplotlib/tests/test_dviread.py
index 3d61a4140eb3..e3986ebb1c5f 100644
--- a/lib/matplotlib/tests/test_dviread.py
+++ b/lib/matplotlib/tests/test_dviread.py
@@ -80,11 +80,11 @@ def test_dviread():
 @skip_if_command_unavailable(["kpsewhich", "-version"])
 def test_dviread_get_fonts():
     dir = os.path.join(os.path.dirname(__file__), 'baseline_images', 'dviread')
-    with dr.Dvi(os.path.join(dir, 'test.dvi'), None) as dvi:
-        assert dvi.fontnames == \
-            ['cmex10', 'cmmi10', 'cmmi5', 'cmr10', 'cmr5', 'cmr7']
-    with dr.Vf(os.path.join(dir, 'virtual.vf')) as vf:
-        assert vf.fontnames == ['cmex10', 'cmr10']
+    dvi = dr._DviReader(os.path.join(dir, 'test.dvi'), None)
+    assert dvi.fontnames == \
+        {'cmex10', 'cmmi10', 'cmmi5', 'cmr10', 'cmr5', 'cmr7'}
+    vf = dr.Vf(os.path.join(dir, 'virtual.vf'))
+    assert vf.fontnames == {'cmex10', 'cmr10'}
 
 
 def test_dviread_get_fonts_error_handling():