diff --git a/lib/matplotlib/backends/_backend_pdf_ps.py b/lib/matplotlib/backends/_backend_pdf_ps.py index 1dde801d8665..0ff17a105c20 100644 --- a/lib/matplotlib/backends/_backend_pdf_ps.py +++ b/lib/matplotlib/backends/_backend_pdf_ps.py @@ -22,6 +22,12 @@ from fontTools.ttLib import TTFont +_FONT_MAX_GLYPH = { + 3: 256, + 42: 65536, +} + + @functools.lru_cache(50) def _cached_get_afm_from_fname(fname): with open(fname, "rb") as fh: @@ -103,6 +109,57 @@ def font_as_file(font): return fh +class GlyphMap: + """ + A two-way glyph mapping. + + The forward glyph map is from (character string, glyph index)-pairs to + (subset index, subset character code)-pairs. + + The inverse glyph map is from to (subset index, subset character code)-pairs to + (character string, glyph index)-pairs. + """ + + def __init__(self) -> None: + self._forward: dict[tuple[CharacterCodeType, GlyphIndexType], + tuple[int, CharacterCodeType]] = {} + self._inverse: dict[tuple[int, CharacterCodeType], + tuple[CharacterCodeType, GlyphIndexType]] = {} + + def get(self, charcodes: str, + glyph_index: GlyphIndexType) -> tuple[int, CharacterCodeType] | None: + """ + Get the forward mapping from a (character string, glyph index)-pair. + + This may return *None* if the pair is not currently mapped. + """ + return self._forward.get((charcodes, glyph_index)) + + def iget(self, subset: int, + subset_charcode: CharacterCodeType) -> tuple[str, GlyphIndexType]: + """Get the inverse mapping from a (subset, subset charcode)-pair.""" + return self._inverse[(subset, subset_charcode)] + + def add(self, charcode: str, glyph_index: GlyphIndexType, subset: int, + subset_charcode: CharacterCodeType) -> None: + """ + Add a mapping to this instance. + + Parameters + ---------- + charcode : CharacterCodeType + The character code to record. + glyph : GlyphIndexType + The corresponding glyph index to record. + subset : int + The subset in which the subset character code resides. + subset_charcode : CharacterCodeType + The subset character code within the above subset. + """ + self._forward[(charcode, glyph_index)] = (subset, subset_charcode) + self._inverse[(subset, subset_charcode)] = (charcode, glyph_index) + + class CharacterTracker: """ Helper for font subsetting by the PDF and PS backends. @@ -114,16 +171,20 @@ class CharacterTracker: ---------- subset_size : int The size at which characters are grouped into subsets. - used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] + used : dict A dictionary of font files to character maps. - The key is a font filename and subset within that font. + The key is a font filename. - The value is a dictionary mapping a character code to a glyph index. Note this - mapping is the inverse of FreeType, which maps glyph indices to character codes. + The value is a list of dictionaries, each mapping at most *subset_size* + character codes to glyph indices. Note this mapping is the inverse of FreeType, + which maps glyph indices to character codes. If *subset_size* is not set, then there will only be one subset per font filename. + glyph_maps : dict + A dictionary of font files to glyph maps. You probably will want to use the + `.subset_to_unicode` method instead of this attribute. """ def __init__(self, subset_size: int = 0): @@ -134,7 +195,8 @@ def __init__(self, subset_size: int = 0): The maximum size that is supported for an embedded font. If provided, then characters will be grouped into these sized subsets. """ - self.used: dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] = {} + self.used: dict[str, list[dict[CharacterCodeType, GlyphIndexType]]] = {} + self.glyph_maps: dict[str, GlyphMap] = {} self.subset_size = subset_size def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]: @@ -157,24 +219,13 @@ def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]: whole). If *subset_size* is not specified, then the subset will always be 0 and the character codes will be returned from the string unchanged. """ - font_glyphs = [] - char_to_font = font._get_fontmap(s) - for _c, _f in char_to_font.items(): - charcode = ord(_c) - glyph_index = _f.get_char_index(charcode) - if self.subset_size != 0: - subset = charcode // self.subset_size - subset_charcode = charcode % self.subset_size - else: - subset = 0 - subset_charcode = charcode - self.used.setdefault((_f.fname, subset), {})[subset_charcode] = glyph_index - font_glyphs.append((subset, subset_charcode)) - return font_glyphs - - def track_glyph( - self, font: FT2Font, charcode: CharacterCodeType, - glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]: + return [ + self.track_glyph(f, ord(c), f.get_char_index(ord(c))) + for c, f in font._get_fontmap(s).items() + ] + + def track_glyph(self, font: FT2Font, chars: str | CharacterCodeType, + glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]: """ Record character code *charcode* at glyph index *glyph* as using font *font*. @@ -182,8 +233,10 @@ def track_glyph( ---------- font : FT2Font A font that is being used for the provided string. - charcode : CharacterCodeType - The character code to record. + chars : str or CharacterCodeType + The character(s) to record. This may be a single character code, or multiple + characters in a string, if the glyph maps to several characters. It will be + normalized to a string internally. glyph : GlyphIndexType The corresponding glyph index to record. @@ -196,33 +249,64 @@ def track_glyph( The character code within the above subset. If *subset_size* was not specified on this instance, then this is just *charcode* unmodified. """ - if self.subset_size != 0: - subset = charcode // self.subset_size - subset_charcode = charcode % self.subset_size + if isinstance(chars, str): + charcode = ord(chars[0]) + else: + charcode = chars + chars = chr(chars) + + glyph_map = self.glyph_maps.setdefault(font.fname, GlyphMap()) + if result := glyph_map.get(chars, glyph): + return result + + subset_maps = self.used.setdefault(font.fname, [{}]) + use_next_charmap = ( + # Multi-character glyphs always go in the non-0 subset. + len(chars) > 1 or + # Default to preserving the character code as it was. + self.subset_size != 0 + and ( + # But start filling a new subset if outside the first block; this + # preserves ASCII (for Type 3) or the Basic Multilingual Plane (for + # Type 42). + charcode >= self.subset_size + # Or, use a new subset if the character code is already mapped for the + # first block. This means it's using an alternate glyph. + or charcode in subset_maps[0] + ) + ) + if use_next_charmap: + if len(subset_maps) == 1 or len(subset_maps[-1]) == self.subset_size: + subset_maps.append({}) + subset = len(subset_maps) - 1 + subset_charcode = len(subset_maps[-1]) else: subset = 0 subset_charcode = charcode - self.used.setdefault((font.fname, subset), {})[subset_charcode] = glyph + subset_maps[subset][subset_charcode] = glyph + glyph_map.add(chars, glyph, subset, subset_charcode) return (subset, subset_charcode) - def subset_to_unicode(self, index: int, - charcode: CharacterCodeType) -> CharacterCodeType: + def subset_to_unicode(self, fontname: str, subset: int, + subset_charcode: CharacterCodeType) -> str: """ Map a subset index and character code to a Unicode character code. Parameters ---------- - index : int + fontname : str + The name of the font, from the *used* dictionary key. + subset : int The subset index within a font. - charcode : CharacterCodeType + subset_charcode : CharacterCodeType The character code within a subset to map back. Returns ------- - CharacterCodeType - The Unicode character code corresponding to the subsetted one. + str + The Unicode character(s) corresponding to the subsetted character code. """ - return index * self.subset_size + charcode + return self.glyph_maps[fontname].iget(subset, subset_charcode)[0] class RendererPDFPSBase(RendererBase): diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index 0f7720b1022f..a850f229ab29 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -368,12 +368,6 @@ def pdfRepr(obj): "objects") -_FONT_MAX_GLYPH = { - 3: 256, - 42: 65536, -} - - class Reference: """ PDF reference object. @@ -691,7 +685,7 @@ def __init__(self, filename, metadata=None): self._fontNames = {} # maps filenames to internal font names self._dviFontInfo = {} # maps pdf names to dvifonts self._character_tracker = _backend_pdf_ps.CharacterTracker( - _FONT_MAX_GLYPH.get(mpl.rcParams['pdf.fonttype'], 0)) + _backend_pdf_ps._FONT_MAX_GLYPH.get(mpl.rcParams['ps.fonttype'], 0)) self.alphaStates = {} # maps alpha values to graphics state objects self._alpha_state_seq = (Name(f'A{i}') for i in itertools.count(1)) @@ -948,9 +942,8 @@ def writeFonts(self): else: # a normal TrueType font _log.debug('Writing TrueType font.') - charmap = self._character_tracker.used.get((filename, subset)) - if charmap: - fonts[Fx] = self.embedTTF(filename, subset, charmap) + charmap = self._character_tracker.used[filename][subset] + fonts[Fx] = self.embedTTF(filename, subset, charmap) self.writeObject(self.fontObject, fonts) def _write_afm_font(self, filename): @@ -992,8 +985,12 @@ def _embedTeXFont(self, dvifont): # Reduce the font to only the glyphs used in the document, get the encoding # for that subset, and compute various properties based on the encoding. - charmap = self._character_tracker.used[(dvifont.fname, 0)] - chars = frozenset(charmap.keys()) + charmap = self._character_tracker.used[dvifont.fname][0] + chars = { + # DVI type 1 fonts always map single glyph to single character. + ord(self._character_tracker.subset_to_unicode(dvifont.fname, 0, ccode)) + for ccode in charmap + } t1font = t1font.subset(chars, self._get_subset_prefix(charmap.values())) fontdict['BaseFont'] = Name(t1font.prop['FontName']) # createType1Descriptor writes the font data as a side effect @@ -1144,14 +1141,16 @@ def generate_unicode_cmap(subset_index, charmap): unicode_groups[-1][1] = ccode last_ccode = ccode + def _to_unicode(ccode): + chars = self._character_tracker.subset_to_unicode( + filename, subset_index, ccode) + hexstr = chars.encode('utf-16be').hex() + return f'<{hexstr}>' + width = 2 if fonttype == 3 else 4 unicode_bfrange = [] for start, end in unicode_groups: - real_start = self._character_tracker.subset_to_unicode(subset_index, - start) - real_end = self._character_tracker.subset_to_unicode(subset_index, end) - real_values = ' '.join('<%s>' % chr(x).encode('utf-16be').hex() - for x in range(real_start, real_end+1)) + real_values = ' '.join(_to_unicode(x) for x in range(start, end+1)) unicode_bfrange.append( f'<{start:0{width}x}> <{end:0{width}x}> [{real_values}]') unicode_cmap = (self._identityToUnicodeCMap % @@ -2325,7 +2324,7 @@ def output_singlebyte_chunk(kerns_or_chars): for item in _text_helpers.layout(s, font, kern_mode=Kerning.UNFITTED, language=language): subset, charcode = self.file._character_tracker.track_glyph( - item.ft_object, ord(item.char), item.glyph_index) + item.ft_object, item.char, item.glyph_index) if (item.ft_object, subset) != prev_font: if singlebyte_chunk: output_singlebyte_chunk(singlebyte_chunk) diff --git a/lib/matplotlib/backends/backend_ps.py b/lib/matplotlib/backends/backend_ps.py index 14518a38c4ef..374e06da68e9 100644 --- a/lib/matplotlib/backends/backend_ps.py +++ b/lib/matplotlib/backends/backend_ps.py @@ -88,7 +88,7 @@ def _move_path_to_path_or_stream(src, dst): shutil.move(src, dst, copy_function=shutil.copyfile) -def _font_to_ps_type3(font_path, glyph_indices): +def _font_to_ps_type3(font_path, subset_index, glyph_indices): """ Subset *glyphs_indices* from the font at *font_path* into a Type 3 font. @@ -96,6 +96,8 @@ def _font_to_ps_type3(font_path, glyph_indices): ---------- font_path : path-like Path to the font to be subsetted. + subset_index : int + The subset of the above font being created. glyph_indices : set[int] The glyphs to include in the subsetted font. @@ -111,7 +113,7 @@ def _font_to_ps_type3(font_path, glyph_indices): %!PS-Adobe-3.0 Resource-Font %%Creator: Converted from TrueType to Type 3 by Matplotlib. 10 dict begin -/FontName /{font_name} def +/FontName /{font_name}-{subset} def /PaintType 0 def /FontMatrix [{inv_units_per_em} 0 0 {inv_units_per_em} 0 0] def /FontBBox [{bbox}] def @@ -119,7 +121,7 @@ def _font_to_ps_type3(font_path, glyph_indices): /Encoding [{encoding}] def /CharStrings {num_glyphs} dict dup begin /.notdef 0 def -""".format(font_name=font.postscript_name, +""".format(font_name=font.postscript_name, subset=subset_index, inv_units_per_em=1 / font.units_per_EM, bbox=" ".join(map(str, font.bbox)), encoding=" ".join(f"/{font.get_glyph_name(glyph_index)}" @@ -168,7 +170,7 @@ def _font_to_ps_type3(font_path, glyph_indices): return preamble + "\n".join(entries) + postamble -def _font_to_ps_type42(font_path, glyph_indices, fh): +def _font_to_ps_type42(font_path, subset_index, glyph_indices, fh): """ Subset *glyph_indices* from the font at *font_path* into a Type 42 font at *fh*. @@ -176,12 +178,14 @@ def _font_to_ps_type42(font_path, glyph_indices, fh): ---------- font_path : path-like Path to the font to be subsetted. + subset_index : int + The subset of the above font being created. glyph_indices : set[int] The glyphs to include in the subsetted font. fh : file-like Where to write the font. """ - _log.debug("SUBSET %s characters: %s", font_path, glyph_indices) + _log.debug("SUBSET %s:%d characters: %s", font_path, subset_index, glyph_indices) try: kw = {} # fix this once we support loading more fonts from a collection @@ -192,10 +196,10 @@ def _font_to_ps_type42(font_path, glyph_indices, fh): _backend_pdf_ps.get_glyphs_subset(font_path, glyph_indices) as subset): fontdata = _backend_pdf_ps.font_as_file(subset).getvalue() _log.debug( - "SUBSET %s %d -> %d", font_path, os.stat(font_path).st_size, - len(fontdata) + "SUBSET %s:%d %d -> %d", font_path, subset_index, + os.stat(font_path).st_size, len(fontdata) ) - fh.write(_serialize_type42(font, subset, fontdata)) + fh.write(_serialize_type42(font, subset_index, subset, fontdata)) except RuntimeError: _log.warning( "The PostScript backend does not currently support the selected font (%s).", @@ -203,7 +207,7 @@ def _font_to_ps_type42(font_path, glyph_indices, fh): raise -def _serialize_type42(font, subset, fontdata): +def _serialize_type42(font, subset_index, subset, fontdata): """ Output a PostScript Type-42 format representation of font @@ -211,6 +215,8 @@ def _serialize_type42(font, subset, fontdata): ---------- font : fontTools.ttLib.ttFont.TTFont The original font object + subset_index : int + The subset of the above font to be created. subset : fontTools.ttLib.ttFont.TTFont The subset font object fontdata : bytes @@ -231,7 +237,7 @@ def _serialize_type42(font, subset, fontdata): 10 dict begin /FontType 42 def /FontMatrix [1 0 0 1 0 0] def - /FontName /{name.getDebugName(6)} def + /FontName /{name.getDebugName(6)}-{subset_index} def /FontInfo 7 dict dup begin /FullName ({name.getDebugName(4)}) def /FamilyName ({name.getDebugName(1)}) def @@ -425,7 +431,8 @@ def __init__(self, width, height, pswriter, imagedpi=72): self._clip_paths = {} self._path_collection_id = 0 - self._character_tracker = _backend_pdf_ps.CharacterTracker() + self._character_tracker = _backend_pdf_ps.CharacterTracker( + _backend_pdf_ps._FONT_MAX_GLYPH.get(mpl.rcParams['ps.fonttype'], 0)) self._logwarn_once = functools.cache(_log.warning) def _is_transparent(self, rgb_or_rgba): @@ -793,12 +800,16 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None): else: language = mtext.get_language() if mtext is not None else None font = self._get_font_ttf(prop) - self._character_tracker.track(font, s) for item in _text_helpers.layout(s, font, language=language): + # NOTE: We ignore the character code in the subset, because PS uses the + # glyph name to write text. The subset is only used to ensure that each + # one does not overflow format limits. + subset, _ = self._character_tracker.track_glyph( + item.ft_object, item.char, item.glyph_index) ps_name = (item.ft_object.postscript_name .encode("ascii", "replace").decode("ascii")) glyph_name = item.ft_object.get_glyph_name(item.glyph_index) - stream.append((ps_name, item.x, glyph_name)) + stream.append((f'{ps_name}-{subset}', item.x, glyph_name)) self.set_color(*gc.get_rgb()) for ps_name, group in itertools. \ @@ -827,11 +838,15 @@ def draw_mathtext(self, gc, x, y, s, prop, angle): f"{angle:g} rotate\n") lastfont = None for font, fontsize, ccode, glyph_index, ox, oy in glyphs: - self._character_tracker.track_glyph(font, ccode, glyph_index) - if (font.postscript_name, fontsize) != lastfont: - lastfont = font.postscript_name, fontsize + # NOTE: We ignore the character code in the subset, because PS uses the + # glyph name to write text. The subset is only used to ensure that each one + # does not overflow format limits. + subset, _ = self._character_tracker.track_glyph( + font, ccode, glyph_index) + if (font.postscript_name, subset, fontsize) != lastfont: + lastfont = font.postscript_name, subset, fontsize self._pswriter.write( - f"/{font.postscript_name} {fontsize} selectfont\n") + f"/{font.postscript_name}-{subset} {fontsize} selectfont\n") glyph_name = font.get_glyph_name(glyph_index) self._pswriter.write( f"{ox:g} {oy:g} moveto\n" @@ -1065,24 +1080,21 @@ def print_figure_impl(fh): Ndict = len(_psDefs) print("%%BeginProlog", file=fh) if not mpl.rcParams['ps.useafm']: - Ndict += len(ps_renderer._character_tracker.used) + Ndict += sum(map(len, ps_renderer._character_tracker.used.values())) print("/mpldict %d dict def" % Ndict, file=fh) print("mpldict begin", file=fh) print("\n".join(_psDefs), file=fh) if not mpl.rcParams['ps.useafm']: - for (font, subset_index), charmap in \ - ps_renderer._character_tracker.used.items(): - if not charmap: - continue - fonttype = mpl.rcParams['ps.fonttype'] - # Can't use more than 255 chars from a single Type 3 font. - if len(charmap) > 255: - fonttype = 42 - fh.flush() - if fonttype == 3: - fh.write(_font_to_ps_type3(font, charmap.values())) - else: # Type 42 only. - _font_to_ps_type42(font, charmap.values(), fh) + for font, subsets in ps_renderer._character_tracker.used.items(): + for subset, charmap in enumerate(subsets): + if not charmap: + continue + fonttype = mpl.rcParams['ps.fonttype'] + fh.flush() + if fonttype == 3: + fh.write(_font_to_ps_type3(font, subset, charmap.values())) + else: # Type 42 only. + _font_to_ps_type42(font, subset, charmap.values(), fh) print("end", file=fh) print("%%EndProlog", file=fh)