-
-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Proof of concept: Type42 subsetting in pdf #18143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,11 +15,13 @@ | |
import os | ||
import re | ||
import struct | ||
import tempfile | ||
import time | ||
import types | ||
import warnings | ||
import zlib | ||
|
||
from fontTools import subset | ||
import numpy as np | ||
from PIL import Image | ||
|
||
|
@@ -36,7 +38,7 @@ | |
import matplotlib.type1font as type1font | ||
import matplotlib.dviread as dviread | ||
from matplotlib.ft2font import (FIXED_WIDTH, ITALIC, LOAD_NO_SCALE, | ||
LOAD_NO_HINTING, KERNING_UNFITTED) | ||
LOAD_NO_HINTING, KERNING_UNFITTED, FT2Font) | ||
from matplotlib.mathtext import MathTextParser | ||
from matplotlib.transforms import Affine2D, BboxBase | ||
from matplotlib.path import Path | ||
|
@@ -1146,6 +1148,23 @@ def embedTTFType42(font, characters, descriptor): | |
wObject = self.reserveObject('Type 0 widths') | ||
toUnicodeMapObject = self.reserveObject('ToUnicode map') | ||
|
||
print(f"SUBSET {filename} characters: " | ||
f"{''.join(chr(c) for c in characters)}") | ||
fontdata = self.getSubset( | ||
filename, | ||
''.join(chr(c) for c in characters) | ||
) | ||
print(f'SUBSET {filename} {os.stat(filename).st_size}' | ||
f' ↦ {len(fontdata)}') | ||
|
||
# reload the font object from the subset | ||
# (all the necessary data could probably be obtained directly | ||
# using fontLib.ttLib) | ||
with tempfile.NamedTemporaryFile(suffix='.ttf') as tmp: | ||
tmp.write(fontdata) | ||
tmp.seek(0, 0) | ||
font = FT2Font(tmp.name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reloading the FT2Font object is a bit ugly, and I think it is only needed here to get the glyph widths, the cid to gid map and the unicode mapping. These could probably be obtained otherwise. On the other hand, reusing the old code makes this patch smaller. |
||
|
||
cidFontDict = { | ||
'Type': Name('Font'), | ||
'Subtype': Name('CIDFontType2'), | ||
|
@@ -1170,21 +1189,12 @@ def embedTTFType42(font, characters, descriptor): | |
|
||
# Make fontfile stream | ||
descriptor['FontFile2'] = fontfileObject | ||
length1Object = self.reserveObject('decoded length of a font') | ||
self.beginStream( | ||
fontfileObject.id, | ||
self.reserveObject('length of font stream'), | ||
{'Length1': length1Object}) | ||
with open(filename, 'rb') as fontfile: | ||
length1 = 0 | ||
while True: | ||
data = fontfile.read(4096) | ||
if not data: | ||
break | ||
length1 += len(data) | ||
self.currentstream.write(data) | ||
{'Length1': len(fontdata)}) | ||
self.currentstream.write(fontdata) | ||
self.endStream() | ||
self.writeObject(length1Object, length1) | ||
|
||
# Make the 'W' (Widths) array, CidToGidMap and ToUnicode CMap | ||
# at the same time | ||
|
@@ -1307,6 +1317,28 @@ def embedTTFType42(font, characters, descriptor): | |
elif fonttype == 42: | ||
return embedTTFType42(font, characters, descriptor) | ||
|
||
@classmethod | ||
def getSubset(self, fontfile, characters): | ||
""" | ||
Subset a TTF font | ||
|
||
Reads the named fontfile and restricts the font to the characters. | ||
Returns a serialization of the subset font as bytes. | ||
""" | ||
|
||
options = subset.Options(glyph_names=True, recommended_glyphs=True) | ||
options.drop_tables += ['FFTM'] | ||
font = subset.load_font(fontfile, options) | ||
try: | ||
subsetter = subset.Subsetter(options=options) | ||
subsetter.populate(text=characters) | ||
subsetter.subset(font) | ||
fh = BytesIO() | ||
font.save(fh, reorderTables=False) | ||
return fh.getvalue() | ||
finally: | ||
font.close() | ||
|
||
def alphaState(self, alpha): | ||
"""Return name of an ExtGState that sets alpha to the given value.""" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,8 @@ def pytest_configure(config): | |
("markers", "baseline_images: Compare output against references."), | ||
("markers", "pytz: Tests that require pytz to be installed."), | ||
("filterwarnings", "error"), | ||
("filterwarnings", | ||
"ignore:.*The py23 module has been deprecated:DeprecationWarning"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is probably not needed any more: see fonttools/fonttools#2035 |
||
]: | ||
config.addinivalue_line(key, value) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These should obviously be log calls at the debug level.