Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ff9ecdd

Browse files
committed
Support {lua,xe}tex as alternative usetex engine.
Currently, this PR is mostly a proof of concept; it only implements the dvi generation and parsing parts, but does not implement rendering in any of the builtin backends, except svg (under rcParams["svg.fonttype"] = "none", the default). However, there is a companion branch on the mplcairo repository, also named "luadvi", which implements support. Example (requiring both this PR, and mplcairo installed from its luadvi branch): ``` import matplotlib as mpl; mpl.use("module://mplcairo.qt") from matplotlib import pyplot as plt plt.rcParams["text.latex.engine"] = "lualatex" # or "xelatex" plt.rcParams["text.latex.preamble"] = ( # {lua,xe}tex can use any font installed on the system, spec'd using its # "normal" name. Try e.g. DejaVu Sans instead. r"\usepackage{fontspec}\setmainfont{TeX Gyre Pagella}") plt.figtext(.5, .5, r"\textrm{gff\textwon}", usetex=True) plt.show() ``` TODO: - Fix many likely remaining bugs. - Rework font selection in texmanager, which is currently very ad-hoc due to the limited number of fonts supported by latex. - Implement rendering support in the (other) builtin backends. In particular, the Agg (and, if we care, cairo) backend will require significant reworking because dvipng, currently used to rasterize dvi to png, doesn't support luatex-generated dvi; instead we will need to proceed as with the other backends, reading the glyphs one at a time from the dvi file and rasterizing them one at a time to the output buffer. Working on the other backends is not very high on my priority list (as I already have mplcairo as playground...) so it would be nice if others showed some interest for it :-)
1 parent c1dba8e commit ff9ecdd

File tree

6 files changed

+278
-115
lines changed

6 files changed

+278
-115
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def _embedTeXFont(self, fontinfo):
991991

992992
# Widths
993993
widthsObject = self.reserveObject('font widths')
994-
tfm = fontinfo.dvifont._tfm
994+
tfm = fontinfo.dvifont._metrics
995995
# convert from TeX's 12.20 representation to 1/1000 text space units.
996996
widths = [(1000 * tfm.width.get(char, 0)) >> 20
997997
for char in range(max(tfm.width, default=-1) + 1)]

lib/matplotlib/dviread.py

Lines changed: 196 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030

3131
import numpy as np
3232

33-
from matplotlib import _api, cbook
33+
from matplotlib import _api, cbook, textpath
34+
from matplotlib.ft2font import FT2Font, LoadFlags
3435

3536
_log = logging.getLogger(__name__)
3637

@@ -106,18 +107,27 @@ def font_effects(self):
106107
@property
107108
def glyph_name_or_index(self):
108109
"""
109-
Either the glyph name or the native charmap glyph index.
110-
111-
If :file:`pdftex.map` specifies an encoding for this glyph's font, that
112-
is a mapping of glyph indices to Adobe glyph names; use it to convert
113-
dvi indices to glyph names. Callers can then convert glyph names to
114-
glyph indices (with FT_Get_Name_Index/get_name_index), and load the
115-
glyph using FT_Load_Glyph/load_glyph.
116-
117-
If :file:`pdftex.map` specifies no encoding, the indices directly map
118-
to the font's "native" charmap; glyphs should directly load using
119-
FT_Load_Char/load_char after selecting the native charmap.
110+
The glyph name, the native charmap glyph index, or the raw glyph index.
111+
112+
If the font is a TrueType file (which can currently only happen for
113+
DVI files generated by xetex or luatex), then this number is the raw
114+
index of the glyph, which can be passed to FT_Load_Glyph/load_glyph.
115+
116+
Otherwise, the font is a PostScript font. For such fonts, if
117+
:file:`pdftex.map` specifies an encoding for this glyph's font,
118+
that is a mapping of glyph indices to Adobe glyph names; which
119+
is used by this property to convert dvi numbers to glyph names.
120+
Callers can then convert glyph names to glyph indices (with
121+
FT_Get_Name_Index/get_name_index), and load the glyph using
122+
FT_Load_Glyph/load_glyph.
123+
124+
If :file:`pdftex.map` specifies no encoding for a PostScript font,
125+
this number is an index to the font's "native" charmap; glyphs should
126+
directly load using FT_Load_Char/load_char after selecting the native
127+
charmap.
120128
"""
129+
# TODO: The last section is only true on luatex since luaotfload 3.15;
130+
# add a version check in the tex file generated by texmanager.
121131
entry = self._get_pdftexmap_entry()
122132
return (_parse_enc(entry.encoding)[self.glyph]
123133
if entry.encoding is not None else self.glyph)
@@ -399,7 +409,7 @@ def _put_char_real(self, char):
399409
scale = font._scale
400410
for x, y, f, g, w in font._vf[char].text:
401411
newf = DviFont(scale=_mul1220(scale, f._scale),
402-
tfm=f._tfm, texname=f.texname, vf=f._vf)
412+
metrics=f._metrics, texname=f.texname, vf=f._vf)
403413
self.text.append(Text(self.h + _mul1220(x, scale),
404414
self.v + _mul1220(y, scale),
405415
newf, g, newf._width_of(g)))
@@ -495,7 +505,27 @@ def _fnt_def(self, k, c, s, d, a, l):
495505

496506
def _fnt_def_real(self, k, c, s, d, a, l):
497507
n = self.file.read(a + l)
498-
fontname = n[-l:].decode('ascii')
508+
fontname = n[-l:].decode("ascii")
509+
# Note that checksum seems wrong?
510+
if fontname.startswith("["):
511+
path, sep, rest = fontname[1:].rpartition("]")
512+
if not sep or rest[:1] not in ["", ":"]:
513+
raise ValueError(f"Invalid modern font name: {fontname}")
514+
flags = {} # TODO: Actually record these flags.
515+
if rest[1:]:
516+
for kv in rest[1:].split(";"):
517+
k, v = kv.split("=", 1)
518+
if k == "index":
519+
if v != 0:
520+
raise NotImplementedError(
521+
"Indexing TTC fonts is not supported yet")
522+
elif k in ["embolden", "slant", "extend"]:
523+
flags[k] = int(v) / 65536
524+
else:
525+
_log.warning("Ignoring invalid key-value pair: %r", kv)
526+
metrics = TtfMetrics(path)
527+
self.fonts[k] = DviFont(scale=s, metrics=metrics, texname=n, vf=None)
528+
return
499529
try:
500530
tfm = _tfmfile(fontname)
501531
except FileNotFoundError as exc:
@@ -512,12 +542,12 @@ def _fnt_def_real(self, k, c, s, d, a, l):
512542
vf = _vffile(fontname)
513543
except FileNotFoundError:
514544
vf = None
515-
self.fonts[k] = DviFont(scale=s, tfm=tfm, texname=n, vf=vf)
545+
self.fonts[k] = DviFont(scale=s, metrics=tfm, texname=n, vf=vf)
516546

517547
@_dispatch(247, state=_dvistate.pre, args=('u1', 'u4', 'u4', 'u4', 'u1'))
518548
def _pre(self, i, num, den, mag, k):
519549
self.file.read(k) # comment in the dvi file
520-
if i != 2:
550+
if i not in [2, 7]: # 2: pdftex, luatex; 7: xetex
521551
raise ValueError(f"Unknown dvi format {i}")
522552
if num != 25400000 or den != 7227 * 2**16:
523553
raise ValueError("Nonstandard units in dvi file")
@@ -538,13 +568,70 @@ def _post(self, _):
538568
# TODO: actually read the postamble and finale?
539569
# currently post_post just triggers closing the file
540570

541-
@_dispatch(249)
542-
def _post_post(self, _):
571+
@_dispatch(249, args=())
572+
def _post_post(self):
573+
raise NotImplementedError
574+
575+
@_dispatch(250, args=())
576+
def _begin_reflect(self):
543577
raise NotImplementedError
544578

545-
@_dispatch(min=250, max=255)
546-
def _malformed(self, offset):
547-
raise ValueError(f"unknown command: byte {250 + offset}")
579+
@_dispatch(251, args=())
580+
def _end_reflect(self):
581+
raise NotImplementedError
582+
583+
@_dispatch(252, args=())
584+
def _define_native_font(self):
585+
k = self._read_arg(4, signed=False)
586+
s = self._read_arg(4, signed=False)
587+
flags = self._read_arg(2, signed=False)
588+
l = self._read_arg(1, signed=False)
589+
n = self.file.read(l)
590+
i = self._read_arg(4, signed=False)
591+
# TODO: Actually record these flags.
592+
if flags & 0x0200:
593+
rgba = [self._read_arg(1, signed=False) for _ in range(4)]
594+
if flags & 0x1000:
595+
extend = self._read_arg(4, signed=True) / 65536
596+
if flags & 0x2000:
597+
slant = self._read_arg(4, signed=True) / 65536
598+
if flags & 0x4000:
599+
embolden = self._read_arg(4, signed=True) / 65536
600+
if i:
601+
raise NotImplementedError("Indexing TTC fonts is not supported yet")
602+
metrics = TtfMetrics(n)
603+
self.fonts[k] = DviFont(
604+
scale=s, metrics=metrics, texname=b"[" + n + b"]", vf=None)
605+
606+
@_dispatch(253, args=())
607+
def _set_glyphs(self):
608+
w = self._read_arg(4, signed=False)
609+
k = self._read_arg(2, signed=False)
610+
xy = [self._read_arg(4, signed=True) for _ in range(2 * k)]
611+
g = [self._read_arg(2, signed=False) for _ in range(k)]
612+
font = self.fonts[self.f]
613+
for i in range(k):
614+
self.text.append(Text(self.h + xy[2 * i], self.v + xy[2 * i + 1],
615+
font, g[i], font._width_of(g[i])))
616+
self.h += w
617+
618+
@_dispatch(254, args=())
619+
def _set_text_and_glyphs(self):
620+
l = self._read_arg(2, signed=False)
621+
t = self.file.read(2 * l) # utf16
622+
w = self._read_arg(4, signed=False)
623+
k = self._read_arg(2, signed=False)
624+
xy = [self._read_arg(4, signed=True) for _ in range(2 * k)]
625+
g = [self._read_arg(2, signed=False) for _ in range(k)]
626+
font = self.fonts[self.f]
627+
for i in range(k):
628+
self.text.append(Text(self.h + xy[2 * i], self.v + xy[2 * i + 1],
629+
font, g[i], font._width_of(g[i])))
630+
self.h += w
631+
632+
@_dispatch(255)
633+
def _malformed(self, raw):
634+
raise ValueError("unknown command: byte 255")
548635

549636

550637
class DviFont:
@@ -562,7 +649,7 @@ class DviFont:
562649
----------
563650
scale : float
564651
Factor by which the font is scaled from its natural size.
565-
tfm : Tfm
652+
tfm : Tfm | TtfMetrics
566653
TeX font metrics for this font
567654
texname : bytes
568655
Name of the font as used internally by TeX and friends, as an ASCII
@@ -578,12 +665,12 @@ class DviFont:
578665
Size of the font in Adobe points, converted from the slightly
579666
smaller TeX points.
580667
"""
581-
__slots__ = ('texname', 'size', '_scale', '_vf', '_tfm')
668+
__slots__ = ('texname', 'size', '_scale', '_vf', '_metrics')
582669

583-
def __init__(self, scale, tfm, texname, vf):
670+
def __init__(self, scale, metrics, texname, vf):
584671
_api.check_isinstance(bytes, texname=texname)
585672
self._scale = scale
586-
self._tfm = tfm
673+
self._metrics = metrics
587674
self.texname = texname
588675
self._vf = vf
589676
self.size = scale * (72.0 / (72.27 * 2**16))
@@ -604,32 +691,30 @@ def __repr__(self):
604691

605692
def _width_of(self, char):
606693
"""Width of char in dvi units."""
607-
width = self._tfm.width.get(char, None)
608-
if width is not None:
609-
return _mul1220(width, self._scale)
610-
_log.debug('No width for char %d in font %s.', char, self.texname)
611-
return 0
694+
metrics = self._metrics.get_metrics(char)
695+
if metrics is None:
696+
_log.debug('No width for char %d in font %s.', char, self.texname)
697+
return 0
698+
return _mul1220(metrics.width, self._scale)
612699

613700
def _height_depth_of(self, char):
614701
"""Height and depth of char in dvi units."""
615-
result = []
616-
for metric, name in ((self._tfm.height, "height"),
617-
(self._tfm.depth, "depth")):
618-
value = metric.get(char, None)
619-
if value is None:
620-
_log.debug('No %s for char %d in font %s',
621-
name, char, self.texname)
622-
result.append(0)
623-
else:
624-
result.append(_mul1220(value, self._scale))
702+
metrics = self._metrics.get_metrics(char)
703+
if metrics is None:
704+
_log.debug('No metrics for char %d in font %s', char, self.texname)
705+
return [0, 0]
706+
metrics = [
707+
_mul1220(metrics.height, self._scale),
708+
_mul1220(metrics.depth, self._scale),
709+
]
625710
# cmsyXX (symbols font) glyph 0 ("minus") has a nonzero descent
626711
# so that TeX aligns equations properly
627712
# (https://tex.stackexchange.com/q/526103/)
628713
# but we actually care about the rasterization depth to align
629714
# the dvipng-generated images.
630715
if re.match(br'^cmsy\d+$', self.texname) and char == 0:
631-
result[-1] = 0
632-
return result
716+
metrics[-1] = 0
717+
return metrics
633718

634719

635720
class Vf(Dvi):
@@ -761,6 +846,9 @@ def _mul1220(num1, num2):
761846
return (num1*num2) >> 20
762847

763848

849+
WHD = namedtuple('WHD', 'width height depth')
850+
851+
764852
class Tfm:
765853
"""
766854
A TeX Font Metric file.
@@ -783,7 +871,7 @@ class Tfm:
783871
specified in the dvi file. These are dicts because indexing may
784872
not start from 0.
785873
"""
786-
__slots__ = ('checksum', 'design_size', 'width', 'height', 'depth')
874+
__slots__ = ('checksum', 'design_size', '_whds', 'widths')
787875

788876
def __init__(self, filename):
789877
_log.debug('opening tfm file %s', filename)
@@ -799,15 +887,42 @@ def __init__(self, filename):
799887
widths = struct.unpack(f'!{nw}i', file.read(4*nw))
800888
heights = struct.unpack(f'!{nh}i', file.read(4*nh))
801889
depths = struct.unpack(f'!{nd}i', file.read(4*nd))
802-
self.width = {}
803-
self.height = {}
804-
self.depth = {}
890+
self._whds = {}
805891
for idx, char in enumerate(range(bc, ec+1)):
806892
byte0 = char_info[4*idx]
807893
byte1 = char_info[4*idx+1]
808-
self.width[char] = widths[byte0]
809-
self.height[char] = heights[byte1 >> 4]
810-
self.depth[char] = depths[byte1 & 0xf]
894+
self._whds[char] = WHD(
895+
widths[byte0], heights[byte1 >> 4], depths[byte1 & 0xf])
896+
self.widths = [(1000 * self._whds[c].width if c in self._whds else 0) >> 20
897+
for c in range(max(self._whds))] if self._whds else []
898+
899+
def get_metrics(self, char):
900+
return self._whds[char]
901+
902+
width = _api.deprecated("3.11")(
903+
property(lambda self: {c: m.width for c, m in self._whds}))
904+
height = _api.deprecated("3.11")(
905+
property(lambda self: {c: m.height for c, m in self._whds}))
906+
depth = _api.deprecated("3.11")(
907+
property(lambda self: {c: m.depth for c, m in self._whds}))
908+
909+
910+
class TtfMetrics:
911+
def __init__(self, filename):
912+
self._face = FT2Font(filename, hinting_factor=1) # Manage closing?
913+
914+
def get_metrics(self, char):
915+
# _mul2012 uses a truncating bitshift for compatibility with dvitype,
916+
# but I still need to figure out truncation rules when upem is 1000
917+
# (e.g. lmroman10-regular.otf) and thus the metrics themselves are not
918+
# exactly representable as 20.12 fp. For now, just truncate during
919+
# conversion to 20.12 as well. (When upem is 2048 the conversion is
920+
# exact and the truncation does nothing.)
921+
upem = self._face.units_per_EM # Usually 2048 or 1000.
922+
g = self._face.load_glyph(char, LoadFlags.NO_SCALE)
923+
return WHD(int(g.horiAdvance / upem * 2**20),
924+
int(g.height / upem * 2**20),
925+
int((g.height - g.horiBearingY) / upem * 2**20))
811926

812927

813928
PsFont = namedtuple('PsFont', 'texname psname effects encoding filename')
@@ -1002,8 +1117,7 @@ def _parse_enc(path):
10021117
Returns
10031118
-------
10041119
list
1005-
The nth entry of the list is the PostScript glyph name of the nth
1006-
glyph.
1120+
The nth list item is the PostScript glyph name of the nth glyph.
10071121
"""
10081122
no_comments = re.sub("%.*", "", Path(path).read_text(encoding="ascii"))
10091123
array = re.search(r"(?s)\[(.*)\]", no_comments).group(1)
@@ -1108,26 +1222,45 @@ def _fontfile(cls, suffix, texname):
11081222
from argparse import ArgumentParser
11091223
import itertools
11101224

1225+
import fontTools.agl
1226+
11111227
parser = ArgumentParser()
11121228
parser.add_argument("filename")
11131229
parser.add_argument("dpi", nargs="?", type=float, default=None)
11141230
args = parser.parse_args()
11151231
with Dvi(args.filename, args.dpi) as dvi:
11161232
fontmap = PsfontsMap(find_tex_file('pdftex.map'))
11171233
for page in dvi:
1118-
print(f"=== new page === "
1234+
print(f"=== NEW PAGE === "
11191235
f"(w: {page.width}, h: {page.height}, d: {page.descent})")
1120-
for font, group in itertools.groupby(
1121-
page.text, lambda text: text.font):
1122-
print(f"font: {font.texname.decode('latin-1')!r}\t"
1123-
f"scale: {font._scale / 2 ** 20}")
1124-
print("x", "y", "glyph", "chr", "w", "(glyphs)", sep="\t")
1236+
print("--- GLYPHS ---")
1237+
for font, group in itertools.groupby(page.text, lambda text: text.font):
1238+
font_name = font.texname.decode("latin-1")
1239+
filename = (font_name[1:-1] if font_name.startswith("[")
1240+
else fontmap[font.texname].filename)
1241+
if font_name.startswith("["):
1242+
print(f"font: {font_name}")
1243+
else:
1244+
print(f"font: {font_name} at {filename}")
1245+
print(f"scale: {font._scale / 2 ** 20}")
1246+
print(" ".join(map("{:>11}".format, ["x", "y", "glyph", "chr", "w"])))
1247+
face = FT2Font(filename)
11251248
for text in group:
1126-
print(text.x, text.y, text.glyph,
1127-
chr(text.glyph) if chr(text.glyph).isprintable()
1128-
else ".",
1129-
text.width, sep="\t")
1249+
if font_name.startswith("["):
1250+
glyph_name = face.get_glyph_name(text.glyph)
1251+
else:
1252+
if isinstance(text.glyph_name_or_index, str):
1253+
glyph_name = text.glyph_name_or_index
1254+
else:
1255+
textpath.TextToPath._select_native_charmap(face)
1256+
glyph_name = face.get_glyph_name(
1257+
face.get_char_index(text.glyph))
1258+
glyph_str = fontTools.agl.toUnicode(glyph_name)
1259+
print(" ".join(map("{:>11}".format, [
1260+
text.x, text.y, text.glyph, glyph_str, text.width])))
11301261
if page.boxes:
1131-
print("x", "y", "h", "w", "", "(boxes)", sep="\t")
1262+
print("--- BOXES ---")
1263+
print(" ".join(map("{:>11}".format, ["x", "y", "h", "w"])))
11321264
for box in page.boxes:
1133-
print(box.x, box.y, box.height, box.width, sep="\t")
1265+
print(" ".join(map("{:>11}".format, [
1266+
box.x, box.y, box.height, box.width])))

0 commit comments

Comments
 (0)