diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 64508593e238..28398cd1973e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -145,8 +145,8 @@ jobs: # Install dependencies from PyPI. python -m pip install --upgrade $PRE \ - cycler kiwisolver numpy packaging pillow pyparsing python-dateutil \ - setuptools-scm \ + cycler fonttools kiwisolver numpy packaging pillow pyparsing \ + python-dateutil setuptools-scm \ -r requirements/testing/all.txt \ ${{ matrix.extra-requirements }} diff --git a/doc/api/next_api_changes/development/20391-AG.rst b/doc/api/next_api_changes/development/20391-AG.rst new file mode 100644 index 000000000000..37cc539c5ad2 --- /dev/null +++ b/doc/api/next_api_changes/development/20391-AG.rst @@ -0,0 +1,8 @@ +fontTools for type 42 subsetting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A new dependency known as `fontTools `_ +is integrated in with Maptlotlib 3.5 + +It is designed to be used with PS/EPS and PDF documents; and handles +Type 42 font subsetting. diff --git a/doc/devel/dependencies.rst b/doc/devel/dependencies.rst index dff7a9b463b9..62453e5c18c9 100644 --- a/doc/devel/dependencies.rst +++ b/doc/devel/dependencies.rst @@ -22,6 +22,7 @@ reference. * `kiwisolver `_ (>= 1.0.1) * `Pillow `_ (>= 6.2) * `pyparsing `_ (>=2.2.1) +* `fontTools `_ (>=4.22.0) .. _optional_dependencies: diff --git a/doc/users/next_whats_new/subsetting.rst b/doc/users/next_whats_new/subsetting.rst new file mode 100644 index 000000000000..89c33f58d371 --- /dev/null +++ b/doc/users/next_whats_new/subsetting.rst @@ -0,0 +1,22 @@ +Type 42 Subsetting is now enabled for PDF/PS backends +----------------------------------------------------- + +`~matplotlib.backends.backend_pdf` and `~matplotlib.backends.backend_ps` now use +a unified Type 42 font subsetting interface, with the help of `fontTools `_ + +Set `~matplotlib.RcParams`'s *fonttype* value as ``42`` to trigger this workflow: + +.. code-block:: + + # for PDF backend + plt.rcParams['pdf.fonttype'] = 42 + + # for PS backend + plt.rcParams['ps.fonttype'] = 42 + + + fig, ax = plt.subplots() + ax.text(0.4, 0.5, 'subsetted document is smaller in size!') + + fig.savefig("document.pdf") + fig.savefig("document.ps") diff --git a/lib/matplotlib/backends/_backend_pdf_ps.py b/lib/matplotlib/backends/_backend_pdf_ps.py index 780e79bf71b8..15a5578461cc 100644 --- a/lib/matplotlib/backends/_backend_pdf_ps.py +++ b/lib/matplotlib/backends/_backend_pdf_ps.py @@ -2,8 +2,11 @@ Common functionality between the PDF and PS backends. """ +from io import BytesIO import functools +from fontTools import subset + import matplotlib as mpl from .. import font_manager, ft2font from ..afm import AFM @@ -16,6 +19,35 @@ def _cached_get_afm_from_fname(fname): return AFM(fh) +def get_glyphs_subset(fontfile, characters): + """ + Subset a TTF font + + Reads the named fontfile and restricts the font to the characters. + Returns a serialization of the subset font as file-like object. + + Parameters + ---------- + symbol : str + Path to the font file + characters : str + Continuous set of characters to include in subset + """ + + options = subset.Options(glyph_names=True, recommended_glyphs=True) + + # prevent subsetting FontForge Timestamp and other tables + options.drop_tables += ['FFTM', 'PfEd'] + + with subset.load_font(fontfile, options) as font: + subsetter = subset.Subsetter(options=options) + subsetter.populate(text=characters) + subsetter.subset(font) + fh = BytesIO() + font.save(fh, reorderTables=False) + return fh + + class CharacterTracker: """ Helper for font subsetting by the pdf and ps backends. diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index ec6154234e5d..89afe92ce913 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -14,7 +14,9 @@ import math import os import re +import string import struct +import sys import time import types import warnings @@ -36,7 +38,7 @@ import matplotlib.type1font as type1font import matplotlib.dviread as dviread from matplotlib.ft2font import (FIXED_WIDTH, ITALIC, LOAD_NO_SCALE, - LOAD_NO_HINTING, KERNING_UNFITTED) + LOAD_NO_HINTING, KERNING_UNFITTED, FT2Font) from matplotlib.mathtext import MathTextParser from matplotlib.transforms import Affine2D, BboxBase from matplotlib.path import Path @@ -768,6 +770,22 @@ def newTextnote(self, text, positionRect=[-100, -100, 0, 0]): } self.pageAnnotations.append(theNote) + def _get_subsetted_psname(self, ps_name, charmap): + def toStr(n, base): + if n < base: + return string.ascii_uppercase[n] + else: + return ( + toStr(n // base, base) + string.ascii_uppercase[n % base] + ) + + # encode to string using base 26 + hashed = hash(frozenset(charmap.keys())) % ((sys.maxsize + 1) * 2) + prefix = toStr(hashed, 26) + + # get first 6 characters from prefix + return prefix[:6] + "+" + ps_name + def finalize(self): """Write out the various deferred objects and the pdf end matter.""" @@ -1209,6 +1227,26 @@ def embedTTFType42(font, characters, descriptor): wObject = self.reserveObject('Type 0 widths') toUnicodeMapObject = self.reserveObject('ToUnicode map') + _log.debug( + "SUBSET %s characters: %s", + filename, "".join(chr(c) for c in characters) + ) + fontdata = _backend_pdf_ps.get_glyphs_subset( + filename, "".join(chr(c) for c in characters) + ) + _log.debug( + "SUBSET %s %d -> %d", filename, + os.stat(filename).st_size, fontdata.getbuffer().nbytes + ) + + # We need this ref for XObjects + full_font = font + + # reload the font object from the subset + # (all the necessary data could probably be obtained directly + # using fontLib.ttLib) + font = FT2Font(fontdata) + cidFontDict = { 'Type': Name('Font'), 'Subtype': Name('CIDFontType2'), @@ -1233,21 +1271,12 @@ def embedTTFType42(font, characters, descriptor): # Make fontfile stream descriptor['FontFile2'] = fontfileObject - length1Object = self.reserveObject('decoded length of a font') self.beginStream( fontfileObject.id, self.reserveObject('length of font stream'), - {'Length1': length1Object}) - with open(filename, 'rb') as fontfile: - length1 = 0 - while True: - data = fontfile.read(4096) - if not data: - break - length1 += len(data) - self.currentstream.write(data) + {'Length1': fontdata.getbuffer().nbytes}) + self.currentstream.write(fontdata.getvalue()) self.endStream() - self.writeObject(length1Object, length1) # Make the 'W' (Widths) array, CidToGidMap and ToUnicode CMap # at the same time @@ -1299,10 +1328,10 @@ def embedTTFType42(font, characters, descriptor): glyph_ids = [] for ccode in characters: if not _font_supports_char(fonttype, chr(ccode)): - gind = font.get_char_index(ccode) + gind = full_font.get_char_index(ccode) glyph_ids.append(gind) - bbox = [cvt(x, nearest=False) for x in font.bbox] + bbox = [cvt(x, nearest=False) for x in full_font.bbox] rawcharprocs = _get_pdf_charprocs(filename, glyph_ids) for charname in sorted(rawcharprocs): stream = rawcharprocs[charname] @@ -1352,7 +1381,11 @@ def embedTTFType42(font, characters, descriptor): # Beginning of main embedTTF function... - ps_name = font.postscript_name.encode('ascii', 'replace') + ps_name = self._get_subsetted_psname( + font.postscript_name, + font.get_charmap() + ) + ps_name = ps_name.encode('ascii', 'replace') ps_name = Name(ps_name) pclt = font.get_sfnt_table('pclt') or {'capHeight': 0, 'xHeight': 0} post = font.get_sfnt_table('post') or {'italicAngle': (0, 0)} diff --git a/lib/matplotlib/backends/backend_ps.py b/lib/matplotlib/backends/backend_ps.py index ef98eba4c38a..7cdab6c6a053 100644 --- a/lib/matplotlib/backends/backend_ps.py +++ b/lib/matplotlib/backends/backend_ps.py @@ -7,11 +7,12 @@ from enum import Enum import functools import glob -from io import StringIO +from io import StringIO, TextIOWrapper import logging import math import os import pathlib +import tempfile import re import shutil from tempfile import TemporaryDirectory @@ -27,7 +28,7 @@ GraphicsContextBase, RendererBase) from matplotlib.cbook import is_writable_file_like, file_requires_unicode from matplotlib.font_manager import get_font -from matplotlib.ft2font import LOAD_NO_HINTING, LOAD_NO_SCALE +from matplotlib.ft2font import LOAD_NO_HINTING, LOAD_NO_SCALE, FT2Font from matplotlib._ttconv import convert_ttf_to_ps from matplotlib.mathtext import MathTextParser from matplotlib._mathtext_data import uni2type1 @@ -954,8 +955,40 @@ def print_figure_impl(fh): fh.write(_font_to_ps_type3(font_path, glyph_ids)) else: try: - convert_ttf_to_ps(os.fsencode(font_path), - fh, fonttype, glyph_ids) + _log.debug( + "SUBSET %s characters: %s", font_path, + ''.join(chr(c) for c in chars) + ) + fontdata = _backend_pdf_ps.get_glyphs_subset( + font_path, "".join(chr(c) for c in chars) + ) + _log.debug( + "SUBSET %s %d -> %d", font_path, + os.stat(font_path).st_size, + fontdata.getbuffer().nbytes + ) + + # give ttconv a subsetted font + # along with updated glyph_ids + with TemporaryDirectory() as tmpdir: + tmpfile = os.path.join(tmpdir, "tmp.ttf") + font = FT2Font(fontdata) + glyph_ids = [ + font.get_char_index(c) for c in chars + ] + + with open(tmpfile, 'wb') as tmp: + tmp.write(fontdata.getvalue()) + tmp.flush() + + # TODO: allow convert_ttf_to_ps + # to input file objects (BytesIO) + convert_ttf_to_ps( + os.fsencode(tmpfile), + fh, + fonttype, + glyph_ids, + ) except RuntimeError: _log.warning( "The PostScript backend does not currently " diff --git a/lib/matplotlib/testing/conftest.py b/lib/matplotlib/testing/conftest.py index f35eddf96b00..d0aa85367529 100644 --- a/lib/matplotlib/testing/conftest.py +++ b/lib/matplotlib/testing/conftest.py @@ -19,6 +19,8 @@ def pytest_configure(config): ("markers", "pytz: Tests that require pytz to be installed."), ("markers", "network: Tests that reach out to the network."), ("filterwarnings", "error"), + ("filterwarnings", + "ignore:.*The py23 module has been deprecated:DeprecationWarning"), ]: config.addinivalue_line(key, value) diff --git a/lib/matplotlib/tests/test_backend_pdf.py b/lib/matplotlib/tests/test_backend_pdf.py index 384b88250c7f..8e16eb2b7b94 100644 --- a/lib/matplotlib/tests/test_backend_pdf.py +++ b/lib/matplotlib/tests/test_backend_pdf.py @@ -10,7 +10,11 @@ import matplotlib as mpl from matplotlib import dviread, pyplot as plt, checkdep_usetex, rcParams +from matplotlib.cbook import _get_data_path +from matplotlib.ft2font import FT2Font +from matplotlib.backends._backend_pdf_ps import get_glyphs_subset from matplotlib.backends.backend_pdf import PdfPages + from matplotlib.testing.decorators import check_figures_equal, image_comparison @@ -339,3 +343,28 @@ def test_kerning(): s = "AVAVAVAVAVAVAVAV€AAVV" fig.text(0, .25, s, size=5) fig.text(0, .75, s, size=20) + + +def test_glyphs_subset(): + fpath = str(_get_data_path("fonts/ttf/DejaVuSerif.ttf")) + chars = "these should be subsetted! 1234567890" + + # non-subsetted FT2Font + nosubfont = FT2Font(fpath) + nosubfont.set_text(chars) + + # subsetted FT2Font + subfont = FT2Font(get_glyphs_subset(fpath, chars)) + subfont.set_text(chars) + + nosubcmap = nosubfont.get_charmap() + subcmap = subfont.get_charmap() + + # all unique chars must be available in subsetted font + assert set(chars) == set(chr(key) for key in subcmap.keys()) + + # subsetted font's charmap should have less entries + assert len(subcmap) < len(nosubcmap) + + # since both objects are assigned same characters + assert subfont.get_num_glyphs() == nosubfont.get_num_glyphs() diff --git a/lib/matplotlib/tests/test_backend_ps.py b/lib/matplotlib/tests/test_backend_ps.py index b1ea2cd1736d..07eb0382010b 100644 --- a/lib/matplotlib/tests/test_backend_ps.py +++ b/lib/matplotlib/tests/test_backend_ps.py @@ -207,3 +207,18 @@ def test_type42_font_without_prep(): mpl.rcParams["mathtext.fontset"] = "stix" plt.figtext(0.5, 0.5, "Mass $m$") + + +@pytest.mark.parametrize('fonttype', ["3", "42"]) +def test_fonttype(fonttype): + mpl.rcParams["ps.fonttype"] = fonttype + fig, ax = plt.subplots() + + ax.text(0.25, 0.5, "Forty-two is the answer to everything!") + + buf = io.BytesIO() + fig.savefig(buf, format="ps") + + test = b'/FontType ' + bytes(f"{fonttype}", encoding='utf-8') + b' def' + + assert re.search(test, buf.getvalue(), re.MULTILINE) diff --git a/requirements/testing/minver.txt b/requirements/testing/minver.txt index 395a1d3e26c0..578cd205f9b5 100644 --- a/requirements/testing/minver.txt +++ b/requirements/testing/minver.txt @@ -7,3 +7,4 @@ packaging==20.0 pillow==6.2.0 pyparsing==2.2.1 python-dateutil==2.7 +fonttools==4.22.0 diff --git a/setup.py b/setup.py index 8fcc3ad9bceb..c72e13623c2d 100644 --- a/setup.py +++ b/setup.py @@ -325,6 +325,7 @@ def make_release_tree(self, base_dir, files): ], install_requires=[ "cycler>=0.10", + "fonttools>=4.22.0", "kiwisolver>=1.0.1", "numpy>=1.17", "packaging>=20.0", diff --git a/src/_ttconv.cpp b/src/_ttconv.cpp index daa9f2639e55..635f7c7bcfde 100644 --- a/src/_ttconv.cpp +++ b/src/_ttconv.cpp @@ -164,7 +164,7 @@ static PyMethodDef ttconv_methods[] = "font data will be written to.\n" "fonttype may be either 3 or 42. Type 3 is a \"raw Postscript\" font. " "Type 42 is an embedded Truetype font. Glyph subsetting is not supported " - "for Type 42 fonts.\n" + "for Type 42 fonts within this module (needs to be done externally).\n" "glyph_ids (optional) is a list of glyph ids (integers) to keep when " "subsetting to a Type 3 font. If glyph_ids is not provided or is None, " "then all glyphs will be included. If any of the glyphs specified are "