Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 254e3df

Browse files
committed
Simplify psfonts.map parsing further
Combine the word splitting and classification in one regex so we only have to scan each line once. Add some quotation marks in the test case to check that we are handling quoted words correctly (the behavior should always have matched this test case).
1 parent 94587b1 commit 254e3df

2 files changed

Lines changed: 60 additions & 52 deletions

File tree

lib/matplotlib/dviread.py

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -868,16 +868,8 @@ def __getitem__(self, texname):
868868
return result._replace(filename=fn, encoding=enc)
869869

870870
def _parse(self, file):
871-
for line in file:
872-
line = line.strip()
873-
if line == b'' or line.startswith(b'%'):
874-
continue
875-
words = [word.strip(b'"') for word in
876-
re.findall(b'("[^"]*"|[^ ]+)', line)]
877-
self._register(words)
878-
879-
def _register(self, words):
880-
"""Register a font described by "words", a sequence of bytestrings.
871+
"""
872+
Parse the font mapping file.
881873
882874
The format is, AFAIK: texname fontname [effects and filenames]
883875
Effects are PostScript snippets like ".177 SlantFont",
@@ -889,52 +881,68 @@ def _register(self, words):
889881
There is some difference between <foo.pfb and <<bar.pfb in
890882
subsetting, but I have no example of << in my TeX installation.
891883
"""
892-
893884
# If the map file specifies multiple encodings for a font, we
894885
# follow pdfTeX in choosing the last one specified. Such
895886
# entries are probably mistakes but they have occurred.
896887
# http://tex.stackexchange.com/questions/10826/
897888
# http://article.gmane.org/gmane.comp.tex.pdftex/4914
898889

899-
texname, psname = words[:2]
900-
words = words[2:]
901-
effects, encoding, filename = b'', None, None
890+
empty_re = re.compile(br'%|\s*$')
891+
word_re = re.compile(
892+
br'''(?x) (?:
893+
"<\[ (?P<enc1> [^"]+ )" | # quoted encoding marked by [
894+
"< (?P<enc2> [^"]+.enc)" | # quoted encoding, ends in .enc
895+
"<<? (?P<file1> [^"]+ )" | # quoted font file name
896+
" (?P<eff1> [^"]+ )" | # quoted effects or font name
897+
<\[ (?P<enc3> \S+ ) | # encoding marked by [
898+
< (?P<enc4> \S+ .enc) | # encoding, ends in .enc
899+
<<? (?P<file2> \S+ ) | # font file name
900+
(?P<eff2> \S+ ) # effects or font name
901+
)''')
902+
effects_re = re.compile(
903+
br'''(?x) (?P<slant> -?[0-9]*(?:\.[0-9]+)) \s* SlantFont
904+
| (?P<extend>-?[0-9]*(?:\.[0-9]+)) \s* ExtendFont''')
905+
906+
lines = (line.strip()
907+
for line in file
908+
if not empty_re.match(line))
909+
for line in lines:
910+
effects, encoding, filename = b'', None, None
911+
words = word_re.finditer(line)
912+
913+
w = next(words)
914+
texname = w.group('eff2') or w.group('eff1')
915+
w = next(words)
916+
psname = w.group('eff2') or w.group('eff1')
917+
918+
for w in words:
919+
eff = w.group('eff1') or w.group('eff2')
920+
if eff:
921+
effects = eff
922+
continue
923+
enc = (w.group('enc4') or w.group('enc3') or
924+
w.group('enc2') or w.group('enc1'))
925+
if enc:
926+
if encoding is not None:
927+
matplotlib.verbose.report(
928+
'Multiple encodings for %s = %s'
929+
% (texname, psname),
930+
'debug')
931+
encoding = enc
932+
continue
933+
filename = w.group('file2') or w.group('file1')
902934

903-
# pick the last non-filename word for effects
904-
effects_words = [word for word in words if not word.startswith(b'<')]
905-
if effects_words:
906-
effects = effects_words[-1]
935+
effects_dict = {}
936+
for match in effects_re.finditer(effects):
937+
slant = match.group('slant')
938+
if slant:
939+
effects_dict['slant'] = float(slant)
940+
else:
941+
effects_dict['extend'] = float(match.group('extend'))
907942

908-
encoding_re = br'<<?(\[.*|.*\.enc)'
909-
encoding_files = [word.lstrip(b'<').lstrip(b'[')
910-
for word in words
911-
if re.match(encoding_re, word)]
912-
if len(encoding_files) > 1:
913-
matplotlib.verbose.report(
914-
'Multiple encodings for %s = %s' % (texname, psname), 'debug')
915-
if encoding_files:
916-
encoding = encoding_files[-1]
917-
918-
font_files = [word.lstrip(b'<')
919-
for word in words
920-
if word.startswith(b'<')
921-
and not re.match(encoding_re, word)]
922-
if font_files:
923-
filename = font_files[-1]
924-
925-
eff = {}
926-
for psword, keyword in ((b'SlantFont', 'slant'),
927-
(b'ExtendFont', 'extend')):
928-
match = re.search(b'([^ ]+) +' + psword, effects)
929-
if match:
930-
try:
931-
eff[keyword] = float(match.group(1))
932-
except ValueError:
933-
pass
934-
935-
self._font[texname] = PsFont(
936-
texname=texname, psname=psname, effects=eff,
937-
encoding=encoding, filename=filename)
943+
self._font[texname] = PsFont(
944+
texname=texname, psname=psname, effects=effects_dict,
945+
encoding=encoding, filename=filename)
938946

939947

940948
class Encoding(object):

lib/matplotlib/tests/baseline_images/dviread/test.map

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
% used by test_dviread.py
2-
TeXfont1 PSfont1 <font1.pfb <font1.enc
3-
TeXfont2 PSfont2 <font2.enc <font2.pfa
4-
TeXfont3 PSfont3 "1.23 UnknownEffect" <[enc3.foo <font3.pfa
2+
TeXfont1 PSfont1 <font1.pfb "<font1.enc"
3+
TeXfont2 PSfont2 <font2.enc "<font2.pfa"
4+
"TeXfont3" PSfont3 "1.23 UnknownEffect" <[enc3.foo <font3.pfa
55
TeXfont4 PSfont4 "-0.1 SlantFont 2.2 ExtendFont" <font4.enc <font4.pfa
6-
TeXfont5 PSfont5 <encoding1.enc <encoding2.enc <font5.pfb
6+
TeXfont5 "PSfont5" <encoding1.enc <encoding2.enc <font5.pfb
77
TeXfont6 PSfont6
88
TeXfont7 PSfont7 <font7.enc
99
TeXfont8 PSfont8 <font8.pfb

0 commit comments

Comments
 (0)