@@ -828,24 +828,30 @@ class PsfontsMap:
828
828
{'slant': 0.16700000000000001}
829
829
>>> entry.filename
830
830
"""
831
- __slots__ = ('_font ' , '_filename ' )
831
+ __slots__ = ('_filename ' , '_unparsed' , '_parsed ' )
832
832
833
833
# Create a filename -> PsfontsMap cache, so that calling
834
834
# `PsfontsMap(filename)` with the same filename a second time immediately
835
835
# returns the same object.
836
836
@lru_cache ()
837
837
def __new__ (cls , filename ):
838
838
self = object .__new__ (cls )
839
- self ._font = {}
840
839
self ._filename = os .fsdecode (filename )
840
+ # Some TeX distributions have enormous pdftex.map files which would
841
+ # take hundreds of milliseconds to parse, but it is easy enough to just
842
+ # store the unparsed lines (keyed by the first word, which is the
843
+ # texname) and parse them on-demand.
841
844
with open (filename , 'rb' ) as file :
842
- self ._parse (file )
845
+ self ._unparsed = {line .split (b' ' , 1 )[0 ]: line for line in file }
846
+ self ._parsed = {}
843
847
return self
844
848
845
849
def __getitem__ (self , texname ):
846
850
assert isinstance (texname , bytes )
851
+ if texname in self ._unparsed :
852
+ self ._parse_and_cache_line (self ._unparsed .pop (texname ))
847
853
try :
848
- result = self ._font [texname ]
854
+ return self ._parsed [texname ]
849
855
except KeyError :
850
856
fmt = ('A PostScript file for the font whose TeX name is "{0}" '
851
857
'could not be found in the file "{1}". The dviread module '
@@ -854,100 +860,83 @@ def __getitem__(self, texname):
854
860
'This problem can often be solved by installing '
855
861
'a suitable PostScript font package in your (TeX) '
856
862
'package manager.' )
857
- msg = fmt .format (texname .decode ('ascii' ), self ._filename )
858
- msg = textwrap .fill (msg , break_on_hyphens = False ,
859
- break_long_words = False )
860
- _log .info (msg )
863
+ _log .info (textwrap .fill (
864
+ fmt .format (texname .decode ('ascii' ), self ._filename ),
865
+ break_on_hyphens = False , break_long_words = False ))
861
866
raise
862
- fn , enc = result .filename , result .encoding
863
- if fn is not None and not fn .startswith (b'/' ):
864
- fn = find_tex_file (fn )
865
- if enc is not None and not enc .startswith (b'/' ):
866
- enc = find_tex_file (result .encoding )
867
- return result ._replace (filename = fn , encoding = enc )
868
-
869
- def _parse (self , file ):
870
- """
871
- Parse the font mapping file.
872
-
873
- The format is, AFAIK: texname fontname [effects and filenames]
874
- Effects are PostScript snippets like ".177 SlantFont",
875
- filenames begin with one or two less-than signs. A filename
876
- ending in enc is an encoding file, other filenames are font
877
- files. This can be overridden with a left bracket: <[foobar
878
- indicates an encoding file named foobar.
879
867
880
- There is some difference between <foo.pfb and <<bar.pfb in
881
- subsetting, but I have no example of << in my TeX installation.
868
+ def _parse_and_cache_line (self , line ):
869
+ """
870
+ Parse a line in the font mapping file.
871
+
872
+ The format is (partially) documented at
873
+ http://mirrors.ctan.org/systems/doc/pdftex/manual/pdftex-a.pdf
874
+ https://tug.org/texinfohtml/dvips.html#psfonts_002emap
875
+ Each line can have the following fields:
876
+
877
+ - tfmname (first, only required field),
878
+ - psname (defaults to tfmname, must come immediately after tfmname if
879
+ present),
880
+ - fontflags (integer, must come immediately after psname if present,
881
+ ignored by us),
882
+ - special (SlantFont and ExtendFont, only field that is double-quoted),
883
+ - fontfile, encodingfile (optional, prefixed by <, <<, or <[; << always
884
+ precedes a font, <[ always precedes an encoding, < can precede either
885
+ but then an encoding file must have extension .enc; < and << also
886
+ request different font subsetting behaviors but we ignore that; < can
887
+ be separated from the filename by whitespace).
888
+
889
+ special, fontfile, and encodingfile can appear in any order.
882
890
"""
883
891
# If the map file specifies multiple encodings for a font, we
884
892
# follow pdfTeX in choosing the last one specified. Such
885
893
# entries are probably mistakes but they have occurred.
886
894
# http://tex.stackexchange.com/questions/10826/
887
- # http://article.gmane.org/gmane.comp.tex.pdftex/4914
888
-
889
- empty_re = re .compile (br'%|\s*$' )
890
- word_re = re .compile (
891
- br'''(?x) (?:
892
- "<\[ (?P<enc1> [^"]+ )" | # quoted encoding marked by [
893
- "< (?P<enc2> [^"]+.enc)" | # quoted encoding, ends in .enc
894
- "<<? (?P<file1> [^"]+ )" | # quoted font file name
895
- " (?P<eff1> [^"]+ )" | # quoted effects or font name
896
- <\[ (?P<enc3> \S+ ) | # encoding marked by [
897
- < (?P<enc4> \S+ .enc) | # encoding, ends in .enc
898
- <<? (?P<file2> \S+ ) | # font file name
899
- (?P<eff2> \S+ ) # effects or font name
900
- )''' )
901
- effects_re = re .compile (
902
- br'''(?x) (?P<slant> -?[0-9]*(?:\.[0-9]+)) \s* SlantFont
903
- | (?P<extend>-?[0-9]*(?:\.[0-9]+)) \s* ExtendFont''' )
904
-
905
- lines = (line .strip ()
906
- for line in file
907
- if not empty_re .match (line ))
908
- for line in lines :
909
- effects , encoding , filename = b'' , None , None
910
- words = word_re .finditer (line )
911
-
912
- # The named groups are mutually exclusive and are
913
- # referenced below at an estimated order of probability of
914
- # occurrence based on looking at my copy of pdftex.map.
915
- # The font names are probably unquoted:
916
- w = next (words )
917
- texname = w .group ('eff2' ) or w .group ('eff1' )
918
- w = next (words )
919
- psname = w .group ('eff2' ) or w .group ('eff1' )
920
-
921
- for w in words :
922
- # Any effects are almost always quoted:
923
- eff = w .group ('eff1' ) or w .group ('eff2' )
924
- if eff :
925
- effects = eff
926
- continue
927
- # Encoding files usually have the .enc suffix
928
- # and almost never need quoting:
929
- enc = (w .group ('enc4' ) or w .group ('enc3' ) or
930
- w .group ('enc2' ) or w .group ('enc1' ))
931
- if enc :
932
- if encoding is not None :
933
- _log .debug ('Multiple encodings for %s = %s' ,
934
- texname , psname )
935
- encoding = enc
936
- continue
937
- # File names are probably unquoted:
938
- filename = w .group ('file2' ) or w .group ('file1' )
939
-
940
- effects_dict = {}
941
- for match in effects_re .finditer (effects ):
942
- slant = match .group ('slant' )
943
- if slant :
944
- effects_dict ['slant' ] = float (slant )
945
- else :
946
- effects_dict ['extend' ] = float (match .group ('extend' ))
947
895
948
- self ._font [texname ] = PsFont (
949
- texname = texname , psname = psname , effects = effects_dict ,
950
- encoding = encoding , filename = filename )
896
+ if not line or line .startswith ((b" " , b"%" , b"*" , b";" , b"#" )):
897
+ return
898
+ tfmname = basename = special = encodingfile = fontfile = None
899
+ matches = re .finditer (br'"([^"]*)(?:"|$)|(\S+)' , line )
900
+ for match in matches :
901
+ quoted , unquoted = match .groups ()
902
+ if unquoted :
903
+ if unquoted .startswith (b"<<" ): # font
904
+ fontfile = unquoted [2 :]
905
+ elif unquoted .startswith (b"<[" ): # encoding
906
+ encodingfile = unquoted [2 :]
907
+ elif unquoted .startswith (b"<" ): # font or encoding
908
+ word = (
909
+ # <foo => foo
910
+ unquoted [1 :]
911
+ # < by itself => read the next word
912
+ or next (filter (None , next (matches ).groups ())))
913
+ if word .endswith (b".enc" ):
914
+ encodingfile = word
915
+ else :
916
+ fontfile = word
917
+ elif tfmname is None :
918
+ tfmname = unquoted
919
+ elif basename is None :
920
+ basename = unquoted
921
+ elif quoted :
922
+ special = quoted
923
+ if basename is None :
924
+ basename = tfmname
925
+ effects = {}
926
+ if special :
927
+ words = reversed (special .split ())
928
+ for word in words :
929
+ if word == b"SlantFont" :
930
+ effects ["slant" ] = float (next (words ))
931
+ elif word == b"ExtendFont" :
932
+ effects ["extend" ] = float (next (words ))
933
+ if encodingfile is not None and not encodingfile .startswith (b"/" ):
934
+ encodingfile = find_tex_file (encodingfile )
935
+ if fontfile is not None and not fontfile .startswith (b"/" ):
936
+ fontfile = find_tex_file (fontfile )
937
+ self ._parsed [tfmname ] = PsFont (
938
+ texname = tfmname , psname = basename , effects = effects ,
939
+ encoding = encodingfile , filename = fontfile )
951
940
952
941
953
942
# Note: this function should ultimately replace the Encoding class, which
0 commit comments