30
30
31
31
import numpy as np
32
32
33
- from matplotlib import _api , cbook
33
+ from matplotlib import _api , cbook , textpath
34
+ from matplotlib .ft2font import FT2Font , LoadFlags
34
35
35
36
_log = logging .getLogger (__name__ )
36
37
@@ -106,18 +107,27 @@ def font_effects(self):
106
107
@property
107
108
def glyph_name_or_index (self ):
108
109
"""
109
- Either the glyph name or the native charmap glyph index.
110
-
111
- If :file:`pdftex.map` specifies an encoding for this glyph's font, that
112
- is a mapping of glyph indices to Adobe glyph names; use it to convert
113
- dvi indices to glyph names. Callers can then convert glyph names to
114
- glyph indices (with FT_Get_Name_Index/get_name_index), and load the
115
- glyph using FT_Load_Glyph/load_glyph.
116
-
117
- If :file:`pdftex.map` specifies no encoding, the indices directly map
118
- to the font's "native" charmap; glyphs should directly load using
119
- FT_Load_Char/load_char after selecting the native charmap.
110
+ The glyph name, the native charmap glyph index, or the raw glyph index.
111
+
112
+ If the font is a TrueType file (which can currently only happen for
113
+ DVI files generated by xetex or luatex), then this number is the raw
114
+ index of the glyph, which can be passed to FT_Load_Glyph/load_glyph.
115
+
116
+ Otherwise, the font is a PostScript font. For such fonts, if
117
+ :file:`pdftex.map` specifies an encoding for this glyph's font,
118
+ that is a mapping of glyph indices to Adobe glyph names; which
119
+ is used by this property to convert dvi numbers to glyph names.
120
+ Callers can then convert glyph names to glyph indices (with
121
+ FT_Get_Name_Index/get_name_index), and load the glyph using
122
+ FT_Load_Glyph/load_glyph.
123
+
124
+ If :file:`pdftex.map` specifies no encoding for a PostScript font,
125
+ this number is an index to the font's "native" charmap; glyphs should
126
+ directly load using FT_Load_Char/load_char after selecting the native
127
+ charmap.
120
128
"""
129
+ # TODO: The last section is only true on luatex since luaotfload 3.15;
130
+ # add a version check in the tex file generated by texmanager.
121
131
entry = self ._get_pdftexmap_entry ()
122
132
return (_parse_enc (entry .encoding )[self .glyph ]
123
133
if entry .encoding is not None else self .glyph )
@@ -399,7 +409,7 @@ def _put_char_real(self, char):
399
409
scale = font ._scale
400
410
for x , y , f , g , w in font ._vf [char ].text :
401
411
newf = DviFont (scale = _mul1220 (scale , f ._scale ),
402
- tfm = f ._tfm , texname = f .texname , vf = f ._vf )
412
+ metrics = f ._metrics , texname = f .texname , vf = f ._vf )
403
413
self .text .append (Text (self .h + _mul1220 (x , scale ),
404
414
self .v + _mul1220 (y , scale ),
405
415
newf , g , newf ._width_of (g )))
@@ -495,7 +505,27 @@ def _fnt_def(self, k, c, s, d, a, l):
495
505
496
506
def _fnt_def_real (self , k , c , s , d , a , l ):
497
507
n = self .file .read (a + l )
498
- fontname = n [- l :].decode ('ascii' )
508
+ fontname = n [- l :].decode ("ascii" )
509
+ # Note that checksum seems wrong?
510
+ if fontname .startswith ("[" ):
511
+ path , sep , rest = fontname [1 :].rpartition ("]" )
512
+ if not sep or rest [:1 ] not in ["" , ":" ]:
513
+ raise ValueError (f"Invalid modern font name: { fontname } " )
514
+ flags = {} # TODO: Actually record these flags.
515
+ if rest [1 :]:
516
+ for kv in rest [1 :].split (";" ):
517
+ k , v = kv .split ("=" , 1 )
518
+ if k == "index" :
519
+ if v != 0 :
520
+ raise NotImplementedError (
521
+ "Indexing TTC fonts is not supported yet" )
522
+ elif k in ["embolden" , "slant" , "extend" ]:
523
+ flags [k ] = int (v ) / 65536
524
+ else :
525
+ _log .warning ("Ignoring invalid key-value pair: %r" , kv )
526
+ metrics = TtfMetrics (path )
527
+ self .fonts [k ] = DviFont (scale = s , metrics = metrics , texname = n , vf = None )
528
+ return
499
529
try :
500
530
tfm = _tfmfile (fontname )
501
531
except FileNotFoundError as exc :
@@ -512,12 +542,12 @@ def _fnt_def_real(self, k, c, s, d, a, l):
512
542
vf = _vffile (fontname )
513
543
except FileNotFoundError :
514
544
vf = None
515
- self .fonts [k ] = DviFont (scale = s , tfm = tfm , texname = n , vf = vf )
545
+ self .fonts [k ] = DviFont (scale = s , metrics = tfm , texname = n , vf = vf )
516
546
517
547
@_dispatch (247 , state = _dvistate .pre , args = ('u1' , 'u4' , 'u4' , 'u4' , 'u1' ))
518
548
def _pre (self , i , num , den , mag , k ):
519
549
self .file .read (k ) # comment in the dvi file
520
- if i != 2 :
550
+ if i not in [ 2 , 7 ]: # 2: pdftex, luatex; 7: xetex
521
551
raise ValueError (f"Unknown dvi format { i } " )
522
552
if num != 25400000 or den != 7227 * 2 ** 16 :
523
553
raise ValueError ("Nonstandard units in dvi file" )
@@ -538,13 +568,70 @@ def _post(self, _):
538
568
# TODO: actually read the postamble and finale?
539
569
# currently post_post just triggers closing the file
540
570
541
- @_dispatch (249 )
542
- def _post_post (self , _ ):
571
+ @_dispatch (249 , args = ())
572
+ def _post_post (self ):
573
+ raise NotImplementedError
574
+
575
+ @_dispatch (250 , args = ())
576
+ def _begin_reflect (self ):
543
577
raise NotImplementedError
544
578
545
- @_dispatch (min = 250 , max = 255 )
546
- def _malformed (self , offset ):
547
- raise ValueError (f"unknown command: byte { 250 + offset } " )
579
+ @_dispatch (251 , args = ())
580
+ def _end_reflect (self ):
581
+ raise NotImplementedError
582
+
583
+ @_dispatch (252 , args = ())
584
+ def _define_native_font (self ):
585
+ k = self ._read_arg (4 , signed = False )
586
+ s = self ._read_arg (4 , signed = False )
587
+ flags = self ._read_arg (2 , signed = False )
588
+ l = self ._read_arg (1 , signed = False )
589
+ n = self .file .read (l )
590
+ i = self ._read_arg (4 , signed = False )
591
+ # TODO: Actually record these flags.
592
+ if flags & 0x0200 :
593
+ rgba = [self ._read_arg (1 , signed = False ) for _ in range (4 )]
594
+ if flags & 0x1000 :
595
+ extend = self ._read_arg (4 , signed = True ) / 65536
596
+ if flags & 0x2000 :
597
+ slant = self ._read_arg (4 , signed = True ) / 65536
598
+ if flags & 0x4000 :
599
+ embolden = self ._read_arg (4 , signed = True ) / 65536
600
+ if i :
601
+ raise NotImplementedError ("Indexing TTC fonts is not supported yet" )
602
+ metrics = TtfMetrics (n )
603
+ self .fonts [k ] = DviFont (
604
+ scale = s , metrics = metrics , texname = b"[" + n + b"]" , vf = None )
605
+
606
+ @_dispatch (253 , args = ())
607
+ def _set_glyphs (self ):
608
+ w = self ._read_arg (4 , signed = False )
609
+ k = self ._read_arg (2 , signed = False )
610
+ xy = [self ._read_arg (4 , signed = True ) for _ in range (2 * k )]
611
+ g = [self ._read_arg (2 , signed = False ) for _ in range (k )]
612
+ font = self .fonts [self .f ]
613
+ for i in range (k ):
614
+ self .text .append (Text (self .h + xy [2 * i ], self .v + xy [2 * i + 1 ],
615
+ font , g [i ], font ._width_of (g [i ])))
616
+ self .h += w
617
+
618
+ @_dispatch (254 , args = ())
619
+ def _set_text_and_glyphs (self ):
620
+ l = self ._read_arg (2 , signed = False )
621
+ t = self .file .read (2 * l ) # utf16
622
+ w = self ._read_arg (4 , signed = False )
623
+ k = self ._read_arg (2 , signed = False )
624
+ xy = [self ._read_arg (4 , signed = True ) for _ in range (2 * k )]
625
+ g = [self ._read_arg (2 , signed = False ) for _ in range (k )]
626
+ font = self .fonts [self .f ]
627
+ for i in range (k ):
628
+ self .text .append (Text (self .h + xy [2 * i ], self .v + xy [2 * i + 1 ],
629
+ font , g [i ], font ._width_of (g [i ])))
630
+ self .h += w
631
+
632
+ @_dispatch (255 )
633
+ def _malformed (self , raw ):
634
+ raise ValueError ("unknown command: byte 255" )
548
635
549
636
550
637
class DviFont :
@@ -562,7 +649,7 @@ class DviFont:
562
649
----------
563
650
scale : float
564
651
Factor by which the font is scaled from its natural size.
565
- tfm : Tfm
652
+ tfm : Tfm | TtfMetrics
566
653
TeX font metrics for this font
567
654
texname : bytes
568
655
Name of the font as used internally by TeX and friends, as an ASCII
@@ -578,12 +665,12 @@ class DviFont:
578
665
Size of the font in Adobe points, converted from the slightly
579
666
smaller TeX points.
580
667
"""
581
- __slots__ = ('texname' , 'size' , '_scale' , '_vf' , '_tfm ' )
668
+ __slots__ = ('texname' , 'size' , '_scale' , '_vf' , '_metrics ' )
582
669
583
- def __init__ (self , scale , tfm , texname , vf ):
670
+ def __init__ (self , scale , metrics , texname , vf ):
584
671
_api .check_isinstance (bytes , texname = texname )
585
672
self ._scale = scale
586
- self ._tfm = tfm
673
+ self ._metrics = metrics
587
674
self .texname = texname
588
675
self ._vf = vf
589
676
self .size = scale * (72.0 / (72.27 * 2 ** 16 ))
@@ -604,32 +691,30 @@ def __repr__(self):
604
691
605
692
def _width_of (self , char ):
606
693
"""Width of char in dvi units."""
607
- width = self ._tfm . width . get (char , None )
608
- if width is not None :
609
- return _mul1220 ( width , self ._scale )
610
- _log . debug ( 'No width for char %d in font %s.' , char , self . texname )
611
- return 0
694
+ metrics = self ._metrics . get_metrics (char )
695
+ if metrics is None :
696
+ _log . debug ( 'No width for char %d in font %s.' , char , self .texname )
697
+ return 0
698
+ return _mul1220 ( metrics . width , self . _scale )
612
699
613
700
def _height_depth_of (self , char ):
614
701
"""Height and depth of char in dvi units."""
615
- result = []
616
- for metric , name in ((self ._tfm .height , "height" ),
617
- (self ._tfm .depth , "depth" )):
618
- value = metric .get (char , None )
619
- if value is None :
620
- _log .debug ('No %s for char %d in font %s' ,
621
- name , char , self .texname )
622
- result .append (0 )
623
- else :
624
- result .append (_mul1220 (value , self ._scale ))
702
+ metrics = self ._metrics .get_metrics (char )
703
+ if metrics is None :
704
+ _log .debug ('No metrics for char %d in font %s' , char , self .texname )
705
+ return [0 , 0 ]
706
+ metrics = [
707
+ _mul1220 (metrics .height , self ._scale ),
708
+ _mul1220 (metrics .depth , self ._scale ),
709
+ ]
625
710
# cmsyXX (symbols font) glyph 0 ("minus") has a nonzero descent
626
711
# so that TeX aligns equations properly
627
712
# (https://tex.stackexchange.com/q/526103/)
628
713
# but we actually care about the rasterization depth to align
629
714
# the dvipng-generated images.
630
715
if re .match (br'^cmsy\d+$' , self .texname ) and char == 0 :
631
- result [- 1 ] = 0
632
- return result
716
+ metrics [- 1 ] = 0
717
+ return metrics
633
718
634
719
635
720
class Vf (Dvi ):
@@ -761,6 +846,9 @@ def _mul1220(num1, num2):
761
846
return (num1 * num2 ) >> 20
762
847
763
848
849
+ WHD = namedtuple ('WHD' , 'width height depth' )
850
+
851
+
764
852
class Tfm :
765
853
"""
766
854
A TeX Font Metric file.
@@ -783,7 +871,7 @@ class Tfm:
783
871
specified in the dvi file. These are dicts because indexing may
784
872
not start from 0.
785
873
"""
786
- __slots__ = ('checksum' , 'design_size' , 'width ' , 'height' , 'depth ' )
874
+ __slots__ = ('checksum' , 'design_size' , '_whds ' , 'widths ' )
787
875
788
876
def __init__ (self , filename ):
789
877
_log .debug ('opening tfm file %s' , filename )
@@ -799,15 +887,42 @@ def __init__(self, filename):
799
887
widths = struct .unpack (f'!{ nw } i' , file .read (4 * nw ))
800
888
heights = struct .unpack (f'!{ nh } i' , file .read (4 * nh ))
801
889
depths = struct .unpack (f'!{ nd } i' , file .read (4 * nd ))
802
- self .width = {}
803
- self .height = {}
804
- self .depth = {}
890
+ self ._whds = {}
805
891
for idx , char in enumerate (range (bc , ec + 1 )):
806
892
byte0 = char_info [4 * idx ]
807
893
byte1 = char_info [4 * idx + 1 ]
808
- self .width [char ] = widths [byte0 ]
809
- self .height [char ] = heights [byte1 >> 4 ]
810
- self .depth [char ] = depths [byte1 & 0xf ]
894
+ self ._whds [char ] = WHD (
895
+ widths [byte0 ], heights [byte1 >> 4 ], depths [byte1 & 0xf ])
896
+ self .widths = [(1000 * self ._whds [c ].width if c in self ._whds else 0 ) >> 20
897
+ for c in range (max (self ._whds ))] if self ._whds else []
898
+
899
+ def get_metrics (self , char ):
900
+ return self ._whds [char ]
901
+
902
+ width = _api .deprecated ("3.11" )(
903
+ property (lambda self : {c : m .width for c , m in self ._whds }))
904
+ height = _api .deprecated ("3.11" )(
905
+ property (lambda self : {c : m .height for c , m in self ._whds }))
906
+ depth = _api .deprecated ("3.11" )(
907
+ property (lambda self : {c : m .depth for c , m in self ._whds }))
908
+
909
+
910
+ class TtfMetrics :
911
+ def __init__ (self , filename ):
912
+ self ._face = FT2Font (filename , hinting_factor = 1 ) # Manage closing?
913
+
914
+ def get_metrics (self , char ):
915
+ # _mul2012 uses a truncating bitshift for compatibility with dvitype,
916
+ # but I still need to figure out truncation rules when upem is 1000
917
+ # (e.g. lmroman10-regular.otf) and thus the metrics themselves are not
918
+ # exactly representable as 20.12 fp. For now, just truncate during
919
+ # conversion to 20.12 as well. (When upem is 2048 the conversion is
920
+ # exact and the truncation does nothing.)
921
+ upem = self ._face .units_per_EM # Usually 2048 or 1000.
922
+ g = self ._face .load_glyph (char , LoadFlags .NO_SCALE )
923
+ return WHD (int (g .horiAdvance / upem * 2 ** 20 ),
924
+ int (g .height / upem * 2 ** 20 ),
925
+ int ((g .height - g .horiBearingY ) / upem * 2 ** 20 ))
811
926
812
927
813
928
PsFont = namedtuple ('PsFont' , 'texname psname effects encoding filename' )
@@ -1002,8 +1117,7 @@ def _parse_enc(path):
1002
1117
Returns
1003
1118
-------
1004
1119
list
1005
- The nth entry of the list is the PostScript glyph name of the nth
1006
- glyph.
1120
+ The nth list item is the PostScript glyph name of the nth glyph.
1007
1121
"""
1008
1122
no_comments = re .sub ("%.*" , "" , Path (path ).read_text (encoding = "ascii" ))
1009
1123
array = re .search (r"(?s)\[(.*)\]" , no_comments ).group (1 )
@@ -1108,26 +1222,45 @@ def _fontfile(cls, suffix, texname):
1108
1222
from argparse import ArgumentParser
1109
1223
import itertools
1110
1224
1225
+ import fontTools .agl
1226
+
1111
1227
parser = ArgumentParser ()
1112
1228
parser .add_argument ("filename" )
1113
1229
parser .add_argument ("dpi" , nargs = "?" , type = float , default = None )
1114
1230
args = parser .parse_args ()
1115
1231
with Dvi (args .filename , args .dpi ) as dvi :
1116
1232
fontmap = PsfontsMap (find_tex_file ('pdftex.map' ))
1117
1233
for page in dvi :
1118
- print (f"=== new page === "
1234
+ print (f"=== NEW PAGE === "
1119
1235
f"(w: { page .width } , h: { page .height } , d: { page .descent } )" )
1120
- for font , group in itertools .groupby (
1121
- page .text , lambda text : text .font ):
1122
- print (f"font: { font .texname .decode ('latin-1' )!r} \t "
1123
- f"scale: { font ._scale / 2 ** 20 } " )
1124
- print ("x" , "y" , "glyph" , "chr" , "w" , "(glyphs)" , sep = "\t " )
1236
+ print ("--- GLYPHS ---" )
1237
+ for font , group in itertools .groupby (page .text , lambda text : text .font ):
1238
+ font_name = font .texname .decode ("latin-1" )
1239
+ filename = (font_name [1 :- 1 ] if font_name .startswith ("[" )
1240
+ else fontmap [font .texname ].filename )
1241
+ if font_name .startswith ("[" ):
1242
+ print (f"font: { font_name } " )
1243
+ else :
1244
+ print (f"font: { font_name } at { filename } " )
1245
+ print (f"scale: { font ._scale / 2 ** 20 } " )
1246
+ print (" " .join (map ("{:>11}" .format , ["x" , "y" , "glyph" , "chr" , "w" ])))
1247
+ face = FT2Font (filename )
1125
1248
for text in group :
1126
- print (text .x , text .y , text .glyph ,
1127
- chr (text .glyph ) if chr (text .glyph ).isprintable ()
1128
- else "." ,
1129
- text .width , sep = "\t " )
1249
+ if font_name .startswith ("[" ):
1250
+ glyph_name = face .get_glyph_name (text .glyph )
1251
+ else :
1252
+ if isinstance (text .glyph_name_or_index , str ):
1253
+ glyph_name = text .glyph_name_or_index
1254
+ else :
1255
+ textpath .TextToPath ._select_native_charmap (face )
1256
+ glyph_name = face .get_glyph_name (
1257
+ face .get_char_index (text .glyph ))
1258
+ glyph_str = fontTools .agl .toUnicode (glyph_name )
1259
+ print (" " .join (map ("{:>11}" .format , [
1260
+ text .x , text .y , text .glyph , glyph_str , text .width ])))
1130
1261
if page .boxes :
1131
- print ("x" , "y" , "h" , "w" , "" , "(boxes)" , sep = "\t " )
1262
+ print ("--- BOXES ---" )
1263
+ print (" " .join (map ("{:>11}" .format , ["x" , "y" , "h" , "w" ])))
1132
1264
for box in page .boxes :
1133
- print (box .x , box .y , box .height , box .width , sep = "\t " )
1265
+ print (" " .join (map ("{:>11}" .format , [
1266
+ box .x , box .y , box .height , box .width ])))
0 commit comments