3030
3131import numpy as np
3232
33- from matplotlib import _api , cbook
33+ from matplotlib import _api , cbook , textpath
34+ from matplotlib .ft2font import FT2Font , LoadFlags
3435
3536_log = logging .getLogger (__name__ )
3637
@@ -106,18 +107,27 @@ def font_effects(self):
106107 @property
107108 def glyph_name_or_index (self ):
108109 """
109- Either the glyph name or the native charmap glyph index.
110-
111- If :file:`pdftex.map` specifies an encoding for this glyph's font, that
112- is a mapping of glyph indices to Adobe glyph names; use it to convert
113- dvi indices to glyph names. Callers can then convert glyph names to
114- glyph indices (with FT_Get_Name_Index/get_name_index), and load the
115- glyph using FT_Load_Glyph/load_glyph.
116-
117- If :file:`pdftex.map` specifies no encoding, the indices directly map
118- to the font's "native" charmap; glyphs should directly load using
119- FT_Load_Char/load_char after selecting the native charmap.
110+ The glyph name, the native charmap glyph index, or the raw glyph index.
111+
112+ If the font is a TrueType file (which can currently only happen for
113+ DVI files generated by xetex or luatex), then this number is the raw
114+ index of the glyph, which can be passed to FT_Load_Glyph/load_glyph.
115+
116+ Otherwise, the font is a PostScript font. For such fonts, if
117+ :file:`pdftex.map` specifies an encoding for this glyph's font,
118+ that is a mapping of glyph indices to Adobe glyph names; which
119+ is used by this property to convert dvi numbers to glyph names.
120+ Callers can then convert glyph names to glyph indices (with
121+ FT_Get_Name_Index/get_name_index), and load the glyph using
122+ FT_Load_Glyph/load_glyph.
123+
124+ If :file:`pdftex.map` specifies no encoding for a PostScript font,
125+ this number is an index to the font's "native" charmap; glyphs should
126+ directly load using FT_Load_Char/load_char after selecting the native
127+ charmap.
120128 """
129+ # TODO: The last section is only true on luatex since luaotfload 3.15;
130+ # add a version check in the tex file generated by texmanager.
121131 entry = self ._get_pdftexmap_entry ()
122132 return (_parse_enc (entry .encoding )[self .glyph ]
123133 if entry .encoding is not None else self .glyph )
@@ -399,7 +409,7 @@ def _put_char_real(self, char):
399409 scale = font ._scale
400410 for x , y , f , g , w in font ._vf [char ].text :
401411 newf = DviFont (scale = _mul1220 (scale , f ._scale ),
402- tfm = f ._tfm , texname = f .texname , vf = f ._vf )
412+ metrics = f ._metrics , texname = f .texname , vf = f ._vf )
403413 self .text .append (Text (self .h + _mul1220 (x , scale ),
404414 self .v + _mul1220 (y , scale ),
405415 newf , g , newf ._width_of (g )))
@@ -495,7 +505,27 @@ def _fnt_def(self, k, c, s, d, a, l):
495505
496506 def _fnt_def_real (self , k , c , s , d , a , l ):
497507 n = self .file .read (a + l )
498- fontname = n [- l :].decode ('ascii' )
508+ fontname = n [- l :].decode ("ascii" )
509+ # Note that checksum seems wrong?
510+ if fontname .startswith ("[" ):
511+ path , sep , rest = fontname [1 :].rpartition ("]" )
512+ if not sep or rest [:1 ] not in ["" , ":" ]:
513+ raise ValueError (f"Invalid modern font name: { fontname } " )
514+ flags = {} # TODO: Actually record these flags.
515+ if rest [1 :]:
516+ for kv in rest [1 :].split (";" ):
517+ k , v = kv .split ("=" , 1 )
518+ if k == "index" :
519+ if v != 0 :
520+ raise NotImplementedError (
521+ "Indexing TTC fonts is not supported yet" )
522+ elif k in ["embolden" , "slant" , "extend" ]:
523+ flags [k ] = int (v ) / 65536
524+ else :
525+ _log .warning ("Ignoring invalid key-value pair: %r" , kv )
526+ metrics = TtfMetrics (path )
527+ self .fonts [k ] = DviFont (scale = s , metrics = metrics , texname = n , vf = None )
528+ return
499529 try :
500530 tfm = _tfmfile (fontname )
501531 except FileNotFoundError as exc :
@@ -512,12 +542,12 @@ def _fnt_def_real(self, k, c, s, d, a, l):
512542 vf = _vffile (fontname )
513543 except FileNotFoundError :
514544 vf = None
515- self .fonts [k ] = DviFont (scale = s , tfm = tfm , texname = n , vf = vf )
545+ self .fonts [k ] = DviFont (scale = s , metrics = tfm , texname = n , vf = vf )
516546
517547 @_dispatch (247 , state = _dvistate .pre , args = ('u1' , 'u4' , 'u4' , 'u4' , 'u1' ))
518548 def _pre (self , i , num , den , mag , k ):
519549 self .file .read (k ) # comment in the dvi file
520- if i != 2 :
550+ if i not in [ 2 , 7 ]: # 2: pdftex, luatex; 7: xetex
521551 raise ValueError (f"Unknown dvi format { i } " )
522552 if num != 25400000 or den != 7227 * 2 ** 16 :
523553 raise ValueError ("Nonstandard units in dvi file" )
@@ -538,13 +568,70 @@ def _post(self, _):
538568 # TODO: actually read the postamble and finale?
539569 # currently post_post just triggers closing the file
540570
541- @_dispatch (249 )
542- def _post_post (self , _ ):
571+ @_dispatch (249 , args = ())
572+ def _post_post (self ):
573+ raise NotImplementedError
574+
575+ @_dispatch (250 , args = ())
576+ def _begin_reflect (self ):
543577 raise NotImplementedError
544578
545- @_dispatch (min = 250 , max = 255 )
546- def _malformed (self , offset ):
547- raise ValueError (f"unknown command: byte { 250 + offset } " )
579+ @_dispatch (251 , args = ())
580+ def _end_reflect (self ):
581+ raise NotImplementedError
582+
583+ @_dispatch (252 , args = ())
584+ def _define_native_font (self ):
585+ k = self ._read_arg (4 , signed = False )
586+ s = self ._read_arg (4 , signed = False )
587+ flags = self ._read_arg (2 , signed = False )
588+ l = self ._read_arg (1 , signed = False )
589+ n = self .file .read (l )
590+ i = self ._read_arg (4 , signed = False )
591+ # TODO: Actually record these flags.
592+ if flags & 0x0200 :
593+ rgba = [self ._read_arg (1 , signed = False ) for _ in range (4 )]
594+ if flags & 0x1000 :
595+ extend = self ._read_arg (4 , signed = True ) / 65536
596+ if flags & 0x2000 :
597+ slant = self ._read_arg (4 , signed = True ) / 65536
598+ if flags & 0x4000 :
599+ embolden = self ._read_arg (4 , signed = True ) / 65536
600+ if i :
601+ raise NotImplementedError ("Indexing TTC fonts is not supported yet" )
602+ metrics = TtfMetrics (n )
603+ self .fonts [k ] = DviFont (
604+ scale = s , metrics = metrics , texname = b"[" + n + b"]" , vf = None )
605+
606+ @_dispatch (253 , args = ())
607+ def _set_glyphs (self ):
608+ w = self ._read_arg (4 , signed = False )
609+ k = self ._read_arg (2 , signed = False )
610+ xy = [self ._read_arg (4 , signed = True ) for _ in range (2 * k )]
611+ g = [self ._read_arg (2 , signed = False ) for _ in range (k )]
612+ font = self .fonts [self .f ]
613+ for i in range (k ):
614+ self .text .append (Text (self .h + xy [2 * i ], self .v + xy [2 * i + 1 ],
615+ font , g [i ], font ._width_of (g [i ])))
616+ self .h += w
617+
618+ @_dispatch (254 , args = ())
619+ def _set_text_and_glyphs (self ):
620+ l = self ._read_arg (2 , signed = False )
621+ t = self .file .read (2 * l ) # utf16
622+ w = self ._read_arg (4 , signed = False )
623+ k = self ._read_arg (2 , signed = False )
624+ xy = [self ._read_arg (4 , signed = True ) for _ in range (2 * k )]
625+ g = [self ._read_arg (2 , signed = False ) for _ in range (k )]
626+ font = self .fonts [self .f ]
627+ for i in range (k ):
628+ self .text .append (Text (self .h + xy [2 * i ], self .v + xy [2 * i + 1 ],
629+ font , g [i ], font ._width_of (g [i ])))
630+ self .h += w
631+
632+ @_dispatch (255 )
633+ def _malformed (self , raw ):
634+ raise ValueError ("unknown command: byte 255" )
548635
549636
550637class DviFont :
@@ -562,7 +649,7 @@ class DviFont:
562649 ----------
563650 scale : float
564651 Factor by which the font is scaled from its natural size.
565- tfm : Tfm
652+ tfm : Tfm | TtfMetrics
566653 TeX font metrics for this font
567654 texname : bytes
568655 Name of the font as used internally by TeX and friends, as an ASCII
@@ -578,12 +665,12 @@ class DviFont:
578665 Size of the font in Adobe points, converted from the slightly
579666 smaller TeX points.
580667 """
581- __slots__ = ('texname' , 'size' , '_scale' , '_vf' , '_tfm ' )
668+ __slots__ = ('texname' , 'size' , '_scale' , '_vf' , '_metrics ' )
582669
583- def __init__ (self , scale , tfm , texname , vf ):
670+ def __init__ (self , scale , metrics , texname , vf ):
584671 _api .check_isinstance (bytes , texname = texname )
585672 self ._scale = scale
586- self ._tfm = tfm
673+ self ._metrics = metrics
587674 self .texname = texname
588675 self ._vf = vf
589676 self .size = scale * (72.0 / (72.27 * 2 ** 16 ))
@@ -604,32 +691,30 @@ def __repr__(self):
604691
605692 def _width_of (self , char ):
606693 """Width of char in dvi units."""
607- width = self ._tfm . width . get (char , None )
608- if width is not None :
609- return _mul1220 ( width , self ._scale )
610- _log . debug ( 'No width for char %d in font %s.' , char , self . texname )
611- return 0
694+ metrics = self ._metrics . get_metrics (char )
695+ if metrics is None :
696+ _log . debug ( 'No width for char %d in font %s.' , char , self .texname )
697+ return 0
698+ return _mul1220 ( metrics . width , self . _scale )
612699
613700 def _height_depth_of (self , char ):
614701 """Height and depth of char in dvi units."""
615- result = []
616- for metric , name in ((self ._tfm .height , "height" ),
617- (self ._tfm .depth , "depth" )):
618- value = metric .get (char , None )
619- if value is None :
620- _log .debug ('No %s for char %d in font %s' ,
621- name , char , self .texname )
622- result .append (0 )
623- else :
624- result .append (_mul1220 (value , self ._scale ))
702+ metrics = self ._metrics .get_metrics (char )
703+ if metrics is None :
704+ _log .debug ('No metrics for char %d in font %s' , char , self .texname )
705+ return [0 , 0 ]
706+ metrics = [
707+ _mul1220 (metrics .height , self ._scale ),
708+ _mul1220 (metrics .depth , self ._scale ),
709+ ]
625710 # cmsyXX (symbols font) glyph 0 ("minus") has a nonzero descent
626711 # so that TeX aligns equations properly
627712 # (https://tex.stackexchange.com/q/526103/)
628713 # but we actually care about the rasterization depth to align
629714 # the dvipng-generated images.
630715 if re .match (br'^cmsy\d+$' , self .texname ) and char == 0 :
631- result [- 1 ] = 0
632- return result
716+ metrics [- 1 ] = 0
717+ return metrics
633718
634719
635720class Vf (Dvi ):
@@ -761,6 +846,9 @@ def _mul1220(num1, num2):
761846 return (num1 * num2 ) >> 20
762847
763848
849+ WHD = namedtuple ('WHD' , 'width height depth' )
850+
851+
764852class Tfm :
765853 """
766854 A TeX Font Metric file.
@@ -783,7 +871,7 @@ class Tfm:
783871 specified in the dvi file. These are dicts because indexing may
784872 not start from 0.
785873 """
786- __slots__ = ('checksum' , 'design_size' , 'width ' , 'height' , 'depth ' )
874+ __slots__ = ('checksum' , 'design_size' , '_whds ' , 'widths ' )
787875
788876 def __init__ (self , filename ):
789877 _log .debug ('opening tfm file %s' , filename )
@@ -799,15 +887,42 @@ def __init__(self, filename):
799887 widths = struct .unpack (f'!{ nw } i' , file .read (4 * nw ))
800888 heights = struct .unpack (f'!{ nh } i' , file .read (4 * nh ))
801889 depths = struct .unpack (f'!{ nd } i' , file .read (4 * nd ))
802- self .width = {}
803- self .height = {}
804- self .depth = {}
890+ self ._whds = {}
805891 for idx , char in enumerate (range (bc , ec + 1 )):
806892 byte0 = char_info [4 * idx ]
807893 byte1 = char_info [4 * idx + 1 ]
808- self .width [char ] = widths [byte0 ]
809- self .height [char ] = heights [byte1 >> 4 ]
810- self .depth [char ] = depths [byte1 & 0xf ]
894+ self ._whds [char ] = WHD (
895+ widths [byte0 ], heights [byte1 >> 4 ], depths [byte1 & 0xf ])
896+ self .widths = [(1000 * self ._whds [c ].width if c in self ._whds else 0 ) >> 20
897+ for c in range (max (self ._whds ))] if self ._whds else []
898+
899+ def get_metrics (self , char ):
900+ return self ._whds [char ]
901+
902+ width = _api .deprecated ("3.11" )(
903+ property (lambda self : {c : m .width for c , m in self ._whds }))
904+ height = _api .deprecated ("3.11" )(
905+ property (lambda self : {c : m .height for c , m in self ._whds }))
906+ depth = _api .deprecated ("3.11" )(
907+ property (lambda self : {c : m .depth for c , m in self ._whds }))
908+
909+
910+ class TtfMetrics :
911+ def __init__ (self , filename ):
912+ self ._face = FT2Font (filename , hinting_factor = 1 ) # Manage closing?
913+
914+ def get_metrics (self , char ):
915+ # _mul2012 uses a truncating bitshift for compatibility with dvitype,
916+ # but I still need to figure out truncation rules when upem is 1000
917+ # (e.g. lmroman10-regular.otf) and thus the metrics themselves are not
918+ # exactly representable as 20.12 fp. For now, just truncate during
919+ # conversion to 20.12 as well. (When upem is 2048 the conversion is
920+ # exact and the truncation does nothing.)
921+ upem = self ._face .units_per_EM # Usually 2048 or 1000.
922+ g = self ._face .load_glyph (char , LoadFlags .NO_SCALE )
923+ return WHD (int (g .horiAdvance / upem * 2 ** 20 ),
924+ int (g .height / upem * 2 ** 20 ),
925+ int ((g .height - g .horiBearingY ) / upem * 2 ** 20 ))
811926
812927
813928PsFont = namedtuple ('PsFont' , 'texname psname effects encoding filename' )
@@ -1002,8 +1117,7 @@ def _parse_enc(path):
10021117 Returns
10031118 -------
10041119 list
1005- The nth entry of the list is the PostScript glyph name of the nth
1006- glyph.
1120+ The nth list item is the PostScript glyph name of the nth glyph.
10071121 """
10081122 no_comments = re .sub ("%.*" , "" , Path (path ).read_text (encoding = "ascii" ))
10091123 array = re .search (r"(?s)\[(.*)\]" , no_comments ).group (1 )
@@ -1108,26 +1222,45 @@ def _fontfile(cls, suffix, texname):
11081222 from argparse import ArgumentParser
11091223 import itertools
11101224
1225+ import fontTools .agl
1226+
11111227 parser = ArgumentParser ()
11121228 parser .add_argument ("filename" )
11131229 parser .add_argument ("dpi" , nargs = "?" , type = float , default = None )
11141230 args = parser .parse_args ()
11151231 with Dvi (args .filename , args .dpi ) as dvi :
11161232 fontmap = PsfontsMap (find_tex_file ('pdftex.map' ))
11171233 for page in dvi :
1118- print (f"=== new page === "
1234+ print (f"=== NEW PAGE === "
11191235 f"(w: { page .width } , h: { page .height } , d: { page .descent } )" )
1120- for font , group in itertools .groupby (
1121- page .text , lambda text : text .font ):
1122- print (f"font: { font .texname .decode ('latin-1' )!r} \t "
1123- f"scale: { font ._scale / 2 ** 20 } " )
1124- print ("x" , "y" , "glyph" , "chr" , "w" , "(glyphs)" , sep = "\t " )
1236+ print ("--- GLYPHS ---" )
1237+ for font , group in itertools .groupby (page .text , lambda text : text .font ):
1238+ font_name = font .texname .decode ("latin-1" )
1239+ filename = (font_name [1 :- 1 ] if font_name .startswith ("[" )
1240+ else fontmap [font .texname ].filename )
1241+ if font_name .startswith ("[" ):
1242+ print (f"font: { font_name } " )
1243+ else :
1244+ print (f"font: { font_name } at { filename } " )
1245+ print (f"scale: { font ._scale / 2 ** 20 } " )
1246+ print (" " .join (map ("{:>11}" .format , ["x" , "y" , "glyph" , "chr" , "w" ])))
1247+ face = FT2Font (filename )
11251248 for text in group :
1126- print (text .x , text .y , text .glyph ,
1127- chr (text .glyph ) if chr (text .glyph ).isprintable ()
1128- else "." ,
1129- text .width , sep = "\t " )
1249+ if font_name .startswith ("[" ):
1250+ glyph_name = face .get_glyph_name (text .glyph )
1251+ else :
1252+ if isinstance (text .glyph_name_or_index , str ):
1253+ glyph_name = text .glyph_name_or_index
1254+ else :
1255+ textpath .TextToPath ._select_native_charmap (face )
1256+ glyph_name = face .get_glyph_name (
1257+ face .get_char_index (text .glyph ))
1258+ glyph_str = fontTools .agl .toUnicode (glyph_name )
1259+ print (" " .join (map ("{:>11}" .format , [
1260+ text .x , text .y , text .glyph , glyph_str , text .width ])))
11301261 if page .boxes :
1131- print ("x" , "y" , "h" , "w" , "" , "(boxes)" , sep = "\t " )
1262+ print ("--- BOXES ---" )
1263+ print (" " .join (map ("{:>11}" .format , ["x" , "y" , "h" , "w" ])))
11321264 for box in page .boxes :
1133- print (box .x , box .y , box .height , box .width , sep = "\t " )
1265+ print (" " .join (map ("{:>11}" .format , [
1266+ box .x , box .y , box .height , box .width ])))
0 commit comments