Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cbb88d4

Browse files
author
Peter Szabo
committed
fixed /Encoding in the output of ConvertType1FontsToType1C; this fixes #50
1 parent cd7b8d6 commit cbb88d4

2 files changed

Lines changed: 87 additions & 6 deletions

File tree

lib/pdfsizeopt/main.py

Lines changed: 84 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5370,8 +5370,34 @@ def GetFonts(self, font_type=None,
53705370
@classmethod
53715371
def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
53725372
pdf_tmp_file_name):
5373+
"""Converts objs with Type1 font programs to Type1C font programs.
5374+
5375+
Uses Ghostscript to do the conversion.
5376+
5377+
Removes the temporary files it creates.
5378+
5379+
As a side effect, the /Encoding field in the returned Type1C font
5380+
programs is useless (it's not the same as in the input Type1 font
5381+
programs). This is a limitation of Ghostscript. As a workaround, the
5382+
correct /Encoding values are returned in `encodings'.
5383+
5384+
Fonts with more than 256 glyphs in their /CharStrings are not converted.
5385+
This is a limitation of Ghostscript.
5386+
5387+
Args:
5388+
objs: dict mapping obj numbers to PdfObj objects containing a Type1
5389+
font program.
5390+
ref_objs: dict containing objs to be used when following references.
5391+
ps_tmp_file_name: Temporary .ps filename to create. Will get removed
5392+
unless an exception is raised.
5393+
pdf_tmp_file_name: Temporary .pdf filename to create. Willget removed
5394+
unless an exception is raised.
5395+
Returns:
5396+
(type1c_objs, encodings), where keys in both type1c_objs and encodings
5397+
are the keys in objs (obj numbers).
5398+
"""
53735399
if not objs:
5374-
return {}
5400+
return {}, {}
53755401
output = ['%!PS-Adobe-3.0\n',
53765402
'% Ghostscript helper for converting Type1 fonts to Type1C\n',
53775403
'%% autogenerated by %s at %s\n' % ('pdfsizeopt', time.time())]
@@ -5425,16 +5451,42 @@ def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
54255451
'info: executing Type1CConverter with Ghostscript: %s' % gs_cmd)
54265452
sys.stdout.flush()
54275453
p = os.popen(gs_cmd, 'rb')
5454+
encoding_prefix = 'obj encoding '
54285455
skip_prefix = 'skipping big-CharStrings font obj '
54295456
big_charstrings_obj_nums = set()
5457+
encodings = {}
54305458
try:
54315459
for line in iter(p.readline, ''):
54325460
if line.startswith(skip_prefix):
54335461
obj_num = int(line[len(skip_prefix):])
54345462
big_charstrings_obj_nums.add(obj_num)
5463+
elif line.startswith(encoding_prefix):
5464+
obj_num, data = line[len(encoding_prefix):].split(' ', 1)
5465+
obj_num = int(obj_num)
5466+
data = data.strip().replace('#', '#23')
5467+
## This escapes eg. * to #2A.
5468+
data = PdfObj.PDF_HEXTOKENS_SAFE_HEX_ESCAPE_RE.sub(
5469+
lambda match: '#%02X' % ord(match.group()), data)
5470+
encoding = PdfObj.ParseArray(data)
5471+
for i in xrange(len(encoding)):
5472+
char_name = encoding[i]
5473+
if char_name is None:
5474+
encoding[i] = '/.notdef'
5475+
else:
5476+
char_name = str(char_name)
5477+
assert char_name.startswith('/'), [char_name]
5478+
encoding[i] = str(char_name)
5479+
encoding.extend('/.notdef' for i in xrange(len(encoding), 256))
5480+
if len(encoding) > 256:
5481+
raise ValueError('Encoding for obj %d too long.' % obj_num)
5482+
encodings[obj_num] = encoding
54355483
else:
54365484
sys.stdout.write(line)
54375485
finally:
5486+
try:
5487+
p.read()
5488+
except IOError:
5489+
pass
54385490
status = p.close()
54395491
sys.stdout.flush()
54405492
if status:
@@ -5468,16 +5520,18 @@ def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
54685520
for obj_num in type1c_objs:
54695521
# TODO(pts): Also cross-check /FontFile3 with pdf.GetFonts.
54705522
if type1c_objs[obj_num].Get('Subtype') != '/Type1C':
5471-
raise ValueError('Could not convert font %s to Type1C.' % obj_num)
5523+
raise ValueError('Could not convert font obj %d to Type1C.' % obj_num)
54725524
type1c_size += type1c_objs[obj_num].size
5525+
if obj_num not in encodings:
5526+
raise ValueError('Missing encoding for font obj %d.' % obj_num)
54735527
# TODO(pts): Don't remove if command-line flag.
54745528
os.remove(pdf_tmp_file_name)
54755529
# TODO(pts): Undo if no reduction in size.
54765530
print >>sys.stderr, (
54775531
'info: optimized total Type1 font size %s to Type1C font size %s '
54785532
'(%s)' %
54795533
(type1_size, type1c_size, FormatPercent(type1c_size, type1_size)))
5480-
return type1c_objs
5534+
return type1c_objs, encodings
54815535

54825536

54835537
@classmethod
@@ -5621,11 +5675,11 @@ def MoveToPrivate(parsed_font, key):
56215675
def ConvertType1FontsToType1C(self):
56225676
"""Convert all Type1 fonts to Type1C in self, returns self."""
56235677
# GenerateType1CFontsFromType1 removes the tmp files it creates.
5624-
type1c_objs = self.GenerateType1CFontsFromType1(
5678+
type1c_objs, encodings = self.GenerateType1CFontsFromType1(
56255679
self.GetFonts('Type1'), self.objs,
56265680
TMP_PREFIX + 'conv.tmp.ps', TMP_PREFIX + 'conv.tmp.pdf')
56275681
for obj_num in type1c_objs:
5628-
obj = self.objs[obj_num]
5682+
obj = self.objs[obj_num] # obj.get('Type') == 'FontDescriptor'.
56295683
assert str(obj.Get('FontName')).startswith('/')
56305684
type1c_obj = type1c_objs[obj_num]
56315685
type1c_obj.FixFontNameInType1C(objs=self.objs)
@@ -5646,10 +5700,33 @@ def ConvertType1FontsToType1C(self):
56465700
FormatPercent(new_size, old_size)))
56475701
else:
56485702
# TODO(pts): How to optimize/unify these?
5703+
# TODO(pts): Don't keep, prevents further optimizations.
56495704
print >>sys.stderr, (
56505705
'info: keeping original Type1 font XObject %s,%s, '
56515706
'replacement too large: old size=%s, new size=%s' %
56525707
(obj_num, font_file_obj_num, old_size, new_size))
5708+
encodings.pop(obj_num, None)
5709+
5710+
# Update encodings.
5711+
if encodings:
5712+
for obj_num in sorted(self.objs):
5713+
obj = self.objs[obj_num]
5714+
head = obj.head
5715+
if (head.startswith('<<') and
5716+
'/Font' in head and '/Type' in head and
5717+
'/Type1' in head and '/Subtype' in head and
5718+
'/FontDescriptor' in head and
5719+
obj.Get('Type') == '/Font' and
5720+
obj.Get('Subtype') == '/Type1'):
5721+
match = obj.PDF_REF_AT_EOS_RE.match(str(obj.Get('FontDescriptor')))
5722+
if match:
5723+
fd_obj_num = int(match.group(1)) # /Type/FontDescriptor.
5724+
if (fd_obj_num in encodings and
5725+
self.IsFontBuiltInEncodingUsed(
5726+
obj.ResolveReferences(obj.Get('Encoding'),
5727+
objs=self.objs)[0])):
5728+
obj.Set('Encoding', self.FormatEncoding(encodings[fd_obj_num]))
5729+
56535730
return self
56545731

56555732
@classmethod
@@ -6070,7 +6147,8 @@ def _ProcessType1CFonts(self, type1c_objs, do_unify_fonts,
60706147
for obj_num in sorted(self.objs):
60716148
obj = self.objs[obj_num]
60726149
head = obj.head
6073-
if ('/Font' in head and '/Type' in head and
6150+
if (head.startswith('<<') and
6151+
'/Font' in head and '/Type' in head and
60746152
'/Type1' in head and '/Subtype' in head and
60756153
'/FontDescriptor' in head and
60766154
obj.Get('Type') == '/Font' and

lib/pdfsizeopt/psproc.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,9 @@
345345
% As a workaround for `S1' above, we skip a font with too many
346346
% /CharStrings.
347347
dup /CharStrings get length 256 lt {
348+
(obj encoding ) print _ObjNumber ===only ( ) print
349+
dup /Encoding .knownget not {[]} if ===
350+
348351
% Create /Encoding from sorted keys of /CharStrings.
349352
[1 index /CharStrings get {pop} forall] NameSort
350353
% Pad it to size 256.

0 commit comments

Comments
 (0)