Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b85e958

Browse files
authored
Merge pull request #19538 from anntzer/psfontsmap
Speedup pdftex.map parsing.
2 parents d7c70bb + 04d28e9 commit b85e958

File tree

3 files changed

+89
-97
lines changed

3 files changed

+89
-97
lines changed

lib/matplotlib/dviread.py

Lines changed: 79 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -828,24 +828,30 @@ class PsfontsMap:
828828
{'slant': 0.16700000000000001}
829829
>>> entry.filename
830830
"""
831-
__slots__ = ('_font', '_filename')
831+
__slots__ = ('_filename', '_unparsed', '_parsed')
832832

833833
# Create a filename -> PsfontsMap cache, so that calling
834834
# `PsfontsMap(filename)` with the same filename a second time immediately
835835
# returns the same object.
836836
@lru_cache()
837837
def __new__(cls, filename):
838838
self = object.__new__(cls)
839-
self._font = {}
840839
self._filename = os.fsdecode(filename)
840+
# Some TeX distributions have enormous pdftex.map files which would
841+
# take hundreds of milliseconds to parse, but it is easy enough to just
842+
# store the unparsed lines (keyed by the first word, which is the
843+
# texname) and parse them on-demand.
841844
with open(filename, 'rb') as file:
842-
self._parse(file)
845+
self._unparsed = {line.split(b' ', 1)[0]: line for line in file}
846+
self._parsed = {}
843847
return self
844848

845849
def __getitem__(self, texname):
846850
assert isinstance(texname, bytes)
851+
if texname in self._unparsed:
852+
self._parse_and_cache_line(self._unparsed.pop(texname))
847853
try:
848-
result = self._font[texname]
854+
return self._parsed[texname]
849855
except KeyError:
850856
fmt = ('A PostScript file for the font whose TeX name is "{0}" '
851857
'could not be found in the file "{1}". The dviread module '
@@ -854,100 +860,83 @@ def __getitem__(self, texname):
854860
'This problem can often be solved by installing '
855861
'a suitable PostScript font package in your (TeX) '
856862
'package manager.')
857-
msg = fmt.format(texname.decode('ascii'), self._filename)
858-
msg = textwrap.fill(msg, break_on_hyphens=False,
859-
break_long_words=False)
860-
_log.info(msg)
863+
_log.info(textwrap.fill(
864+
fmt.format(texname.decode('ascii'), self._filename),
865+
break_on_hyphens=False, break_long_words=False))
861866
raise
862-
fn, enc = result.filename, result.encoding
863-
if fn is not None and not fn.startswith(b'/'):
864-
fn = find_tex_file(fn)
865-
if enc is not None and not enc.startswith(b'/'):
866-
enc = find_tex_file(result.encoding)
867-
return result._replace(filename=fn, encoding=enc)
868-
869-
def _parse(self, file):
870-
"""
871-
Parse the font mapping file.
872-
873-
The format is, AFAIK: texname fontname [effects and filenames]
874-
Effects are PostScript snippets like ".177 SlantFont",
875-
filenames begin with one or two less-than signs. A filename
876-
ending in enc is an encoding file, other filenames are font
877-
files. This can be overridden with a left bracket: <[foobar
878-
indicates an encoding file named foobar.
879867

880-
There is some difference between <foo.pfb and <<bar.pfb in
881-
subsetting, but I have no example of << in my TeX installation.
868+
def _parse_and_cache_line(self, line):
869+
"""
870+
Parse a line in the font mapping file.
871+
872+
The format is (partially) documented at
873+
http://mirrors.ctan.org/systems/doc/pdftex/manual/pdftex-a.pdf
874+
https://tug.org/texinfohtml/dvips.html#psfonts_002emap
875+
Each line can have the following fields:
876+
877+
- tfmname (first, only required field),
878+
- psname (defaults to tfmname, must come immediately after tfmname if
879+
present),
880+
- fontflags (integer, must come immediately after psname if present,
881+
ignored by us),
882+
- special (SlantFont and ExtendFont, only field that is double-quoted),
883+
- fontfile, encodingfile (optional, prefixed by <, <<, or <[; << always
884+
precedes a font, <[ always precedes an encoding, < can precede either
885+
but then an encoding file must have extension .enc; < and << also
886+
request different font subsetting behaviors but we ignore that; < can
887+
be separated from the filename by whitespace).
888+
889+
special, fontfile, and encodingfile can appear in any order.
882890
"""
883891
# If the map file specifies multiple encodings for a font, we
884892
# follow pdfTeX in choosing the last one specified. Such
885893
# entries are probably mistakes but they have occurred.
886894
# http://tex.stackexchange.com/questions/10826/
887-
# http://article.gmane.org/gmane.comp.tex.pdftex/4914
888-
889-
empty_re = re.compile(br'%|\s*$')
890-
word_re = re.compile(
891-
br'''(?x) (?:
892-
"<\[ (?P<enc1> [^"]+ )" | # quoted encoding marked by [
893-
"< (?P<enc2> [^"]+.enc)" | # quoted encoding, ends in .enc
894-
"<<? (?P<file1> [^"]+ )" | # quoted font file name
895-
" (?P<eff1> [^"]+ )" | # quoted effects or font name
896-
<\[ (?P<enc3> \S+ ) | # encoding marked by [
897-
< (?P<enc4> \S+ .enc) | # encoding, ends in .enc
898-
<<? (?P<file2> \S+ ) | # font file name
899-
(?P<eff2> \S+ ) # effects or font name
900-
)''')
901-
effects_re = re.compile(
902-
br'''(?x) (?P<slant> -?[0-9]*(?:\.[0-9]+)) \s* SlantFont
903-
| (?P<extend>-?[0-9]*(?:\.[0-9]+)) \s* ExtendFont''')
904-
905-
lines = (line.strip()
906-
for line in file
907-
if not empty_re.match(line))
908-
for line in lines:
909-
effects, encoding, filename = b'', None, None
910-
words = word_re.finditer(line)
911-
912-
# The named groups are mutually exclusive and are
913-
# referenced below at an estimated order of probability of
914-
# occurrence based on looking at my copy of pdftex.map.
915-
# The font names are probably unquoted:
916-
w = next(words)
917-
texname = w.group('eff2') or w.group('eff1')
918-
w = next(words)
919-
psname = w.group('eff2') or w.group('eff1')
920-
921-
for w in words:
922-
# Any effects are almost always quoted:
923-
eff = w.group('eff1') or w.group('eff2')
924-
if eff:
925-
effects = eff
926-
continue
927-
# Encoding files usually have the .enc suffix
928-
# and almost never need quoting:
929-
enc = (w.group('enc4') or w.group('enc3') or
930-
w.group('enc2') or w.group('enc1'))
931-
if enc:
932-
if encoding is not None:
933-
_log.debug('Multiple encodings for %s = %s',
934-
texname, psname)
935-
encoding = enc
936-
continue
937-
# File names are probably unquoted:
938-
filename = w.group('file2') or w.group('file1')
939-
940-
effects_dict = {}
941-
for match in effects_re.finditer(effects):
942-
slant = match.group('slant')
943-
if slant:
944-
effects_dict['slant'] = float(slant)
945-
else:
946-
effects_dict['extend'] = float(match.group('extend'))
947895

948-
self._font[texname] = PsFont(
949-
texname=texname, psname=psname, effects=effects_dict,
950-
encoding=encoding, filename=filename)
896+
if not line or line.startswith((b" ", b"%", b"*", b";", b"#")):
897+
return
898+
tfmname = basename = special = encodingfile = fontfile = None
899+
matches = re.finditer(br'"([^"]*)(?:"|$)|(\S+)', line)
900+
for match in matches:
901+
quoted, unquoted = match.groups()
902+
if unquoted:
903+
if unquoted.startswith(b"<<"): # font
904+
fontfile = unquoted[2:]
905+
elif unquoted.startswith(b"<["): # encoding
906+
encodingfile = unquoted[2:]
907+
elif unquoted.startswith(b"<"): # font or encoding
908+
word = (
909+
# <foo => foo
910+
unquoted[1:]
911+
# < by itself => read the next word
912+
or next(filter(None, next(matches).groups())))
913+
if word.endswith(b".enc"):
914+
encodingfile = word
915+
else:
916+
fontfile = word
917+
elif tfmname is None:
918+
tfmname = unquoted
919+
elif basename is None:
920+
basename = unquoted
921+
elif quoted:
922+
special = quoted
923+
if basename is None:
924+
basename = tfmname
925+
effects = {}
926+
if special:
927+
words = reversed(special.split())
928+
for word in words:
929+
if word == b"SlantFont":
930+
effects["slant"] = float(next(words))
931+
elif word == b"ExtendFont":
932+
effects["extend"] = float(next(words))
933+
if encodingfile is not None and not encodingfile.startswith(b"/"):
934+
encodingfile = find_tex_file(encodingfile)
935+
if fontfile is not None and not fontfile.startswith(b"/"):
936+
fontfile = find_tex_file(fontfile)
937+
self._parsed[tfmname] = PsFont(
938+
texname=tfmname, psname=basename, effects=effects,
939+
encoding=encodingfile, filename=fontfile)
951940

952941

953942
# Note: this function should ultimately replace the Encoding class, which
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
% used by test_dviread.py
2-
TeXfont1 PSfont1 <font1.pfb "<font1.enc"
3-
TeXfont2 PSfont2 <font2.enc "<font2.pfa"
4-
"TeXfont3" PSfont3 "1.23 UnknownEffect" <[enc3.foo <font3.pfa
2+
TeXfont1 PSfont1 <font1.pfb <font1.enc
3+
TeXfont2 PSfont2 <font2.enc <font2.pfa
4+
TeXfont3 PSfont3 "1.23 UnknownEffect" <[enc3.foo < font3.pfa
55
TeXfont4 PSfont4 "-0.1 SlantFont 2.2 ExtendFont" <font4.enc <font4.pfa
6-
TeXfont5 "PSfont5" <encoding1.enc <encoding2.enc <font5.pfb
6+
TeXfont5 PSfont5 <encoding1.enc <encoding2.enc <font5.pfb
77
TeXfont6 PSfont6
8-
TeXfont7 PSfont7 <font7.enc
9-
TeXfont8 PSfont8 <font8.pfb
10-
TeXfont9 PSfont9 </absolute/font9.pfb
8+
TeXfont7 PSfont7 < font7.enc
9+
TeXfont8 PSfont8 <<font8.pfb
10+
TeXfont9 </absolute/font9.pfb

lib/matplotlib/tests/test_dviread.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,13 @@ def test_PsfontsMap(monkeypatch):
4242
assert entry.filename == b'font8.pfb'
4343
assert entry.encoding is None
4444
entry = fontmap[b'TeXfont9']
45+
assert entry.psname == b'TeXfont9'
4546
assert entry.filename == b'/absolute/font9.pfb'
4647
# Missing font
4748
with pytest.raises(KeyError, match='no-such-font'):
4849
fontmap[b'no-such-font']
50+
with pytest.raises(KeyError, match='%'):
51+
fontmap[b'%']
4952

5053

5154
@pytest.mark.skipif(shutil.which("kpsewhich") is None,

0 commit comments

Comments
 (0)