Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Proof of concept: Type42 subsetting in pdf #18143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 44 additions & 12 deletions lib/matplotlib/backends/backend_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
import os
import re
import struct
import tempfile
import time
import types
import warnings
import zlib

from fontTools import subset
import numpy as np
from PIL import Image

Expand All @@ -36,7 +38,7 @@
import matplotlib.type1font as type1font
import matplotlib.dviread as dviread
from matplotlib.ft2font import (FIXED_WIDTH, ITALIC, LOAD_NO_SCALE,
LOAD_NO_HINTING, KERNING_UNFITTED)
LOAD_NO_HINTING, KERNING_UNFITTED, FT2Font)
from matplotlib.mathtext import MathTextParser
from matplotlib.transforms import Affine2D, BboxBase
from matplotlib.path import Path
Expand Down Expand Up @@ -1146,6 +1148,23 @@ def embedTTFType42(font, characters, descriptor):
wObject = self.reserveObject('Type 0 widths')
toUnicodeMapObject = self.reserveObject('ToUnicode map')

print(f"SUBSET {filename} characters: "
f"{''.join(chr(c) for c in characters)}")
fontdata = self.getSubset(
filename,
''.join(chr(c) for c in characters)
)
print(f'SUBSET {filename} {os.stat(filename).st_size}'
f' ↦ {len(fontdata)}')
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should obviously be log calls at the debug level.


# reload the font object from the subset
# (all the necessary data could probably be obtained directly
# using fontLib.ttLib)
with tempfile.NamedTemporaryFile(suffix='.ttf') as tmp:
tmp.write(fontdata)
tmp.seek(0, 0)
font = FT2Font(tmp.name)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reloading the FT2Font object is a bit ugly, and I think it is only needed here to get the glyph widths, the cid to gid map and the unicode mapping. These could probably be obtained otherwise. On the other hand, reusing the old code makes this patch smaller.


cidFontDict = {
'Type': Name('Font'),
'Subtype': Name('CIDFontType2'),
Expand All @@ -1170,21 +1189,12 @@ def embedTTFType42(font, characters, descriptor):

# Make fontfile stream
descriptor['FontFile2'] = fontfileObject
length1Object = self.reserveObject('decoded length of a font')
self.beginStream(
fontfileObject.id,
self.reserveObject('length of font stream'),
{'Length1': length1Object})
with open(filename, 'rb') as fontfile:
length1 = 0
while True:
data = fontfile.read(4096)
if not data:
break
length1 += len(data)
self.currentstream.write(data)
{'Length1': len(fontdata)})
self.currentstream.write(fontdata)
self.endStream()
self.writeObject(length1Object, length1)

# Make the 'W' (Widths) array, CidToGidMap and ToUnicode CMap
# at the same time
Expand Down Expand Up @@ -1307,6 +1317,28 @@ def embedTTFType42(font, characters, descriptor):
elif fonttype == 42:
return embedTTFType42(font, characters, descriptor)

@classmethod
def getSubset(self, fontfile, characters):
"""
Subset a TTF font

Reads the named fontfile and restricts the font to the characters.
Returns a serialization of the subset font as bytes.
"""

options = subset.Options(glyph_names=True, recommended_glyphs=True)
options.drop_tables += ['FFTM']
font = subset.load_font(fontfile, options)
try:
subsetter = subset.Subsetter(options=options)
subsetter.populate(text=characters)
subsetter.subset(font)
fh = BytesIO()
font.save(fh, reorderTables=False)
return fh.getvalue()
finally:
font.close()

def alphaState(self, alpha):
"""Return name of an ExtGState that sets alpha to the given value."""

Expand Down
2 changes: 2 additions & 0 deletions lib/matplotlib/testing/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def pytest_configure(config):
("markers", "baseline_images: Compare output against references."),
("markers", "pytz: Tests that require pytz to be installed."),
("filterwarnings", "error"),
("filterwarnings",
"ignore:.*The py23 module has been deprecated:DeprecationWarning"),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is probably not needed any more: see fonttools/fonttools#2035

]:
config.addinivalue_line(key, value)

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def build_extensions(self):
],
install_requires=[
"cycler>=0.10",
"fonttools>=4.13.0,<5.0",
"kiwisolver>=1.0.1",
"numpy>=1.16",
"pillow>=6.2.0",
Expand Down