@@ -5370,8 +5370,34 @@ def GetFonts(self, font_type=None,
53705370 @classmethod
53715371 def GenerateType1CFontsFromType1 (cls , objs , ref_objs , ps_tmp_file_name ,
53725372 pdf_tmp_file_name ):
5373+ """Converts objs with Type1 font programs to Type1C font programs.
5374+
5375+ Uses Ghostscript to do the conversion.
5376+
5377+ Removes the temporary files it creates.
5378+
5379+ As a side effect, the /Encoding field in the returned Type1C font
5380+ programs is useless (it's not the same as in the input Type1 font
5381+ programs). This is a limitation of Ghostscript. As a workaround, the
5382+ correct /Encoding values are returned in `encodings'.
5383+
5384+ Fonts with more than 256 glyphs in their /CharStrings are not converted.
5385+ This is a limitation of Ghostscript.
5386+
5387+ Args:
5388+ objs: dict mapping obj numbers to PdfObj objects containing a Type1
5389+ font program.
5390+ ref_objs: dict containing objs to be used when following references.
5391+ ps_tmp_file_name: Temporary .ps filename to create. Will get removed
5392+ unless an exception is raised.
5393+ pdf_tmp_file_name: Temporary .pdf filename to create. Willget removed
5394+ unless an exception is raised.
5395+ Returns:
5396+ (type1c_objs, encodings), where keys in both type1c_objs and encodings
5397+ are the keys in objs (obj numbers).
5398+ """
53735399 if not objs :
5374- return {}
5400+ return {}, {}
53755401 output = ['%!PS-Adobe-3.0\n ' ,
53765402 '% Ghostscript helper for converting Type1 fonts to Type1C\n ' ,
53775403 '%% autogenerated by %s at %s\n ' % ('pdfsizeopt' , time .time ())]
@@ -5425,16 +5451,42 @@ def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
54255451 'info: executing Type1CConverter with Ghostscript: %s' % gs_cmd )
54265452 sys .stdout .flush ()
54275453 p = os .popen (gs_cmd , 'rb' )
5454+ encoding_prefix = 'obj encoding '
54285455 skip_prefix = 'skipping big-CharStrings font obj '
54295456 big_charstrings_obj_nums = set ()
5457+ encodings = {}
54305458 try :
54315459 for line in iter (p .readline , '' ):
54325460 if line .startswith (skip_prefix ):
54335461 obj_num = int (line [len (skip_prefix ):])
54345462 big_charstrings_obj_nums .add (obj_num )
5463+ elif line .startswith (encoding_prefix ):
5464+ obj_num , data = line [len (encoding_prefix ):].split (' ' , 1 )
5465+ obj_num = int (obj_num )
5466+ data = data .strip ().replace ('#' , '#23' )
5467+ ## This escapes eg. * to #2A.
5468+ data = PdfObj .PDF_HEXTOKENS_SAFE_HEX_ESCAPE_RE .sub (
5469+ lambda match : '#%02X' % ord (match .group ()), data )
5470+ encoding = PdfObj .ParseArray (data )
5471+ for i in xrange (len (encoding )):
5472+ char_name = encoding [i ]
5473+ if char_name is None :
5474+ encoding [i ] = '/.notdef'
5475+ else :
5476+ char_name = str (char_name )
5477+ assert char_name .startswith ('/' ), [char_name ]
5478+ encoding [i ] = str (char_name )
5479+ encoding .extend ('/.notdef' for i in xrange (len (encoding ), 256 ))
5480+ if len (encoding ) > 256 :
5481+ raise ValueError ('Encoding for obj %d too long.' % obj_num )
5482+ encodings [obj_num ] = encoding
54355483 else :
54365484 sys .stdout .write (line )
54375485 finally :
5486+ try :
5487+ p .read ()
5488+ except IOError :
5489+ pass
54385490 status = p .close ()
54395491 sys .stdout .flush ()
54405492 if status :
@@ -5468,16 +5520,18 @@ def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
54685520 for obj_num in type1c_objs :
54695521 # TODO(pts): Also cross-check /FontFile3 with pdf.GetFonts.
54705522 if type1c_objs [obj_num ].Get ('Subtype' ) != '/Type1C' :
5471- raise ValueError ('Could not convert font %s to Type1C.' % obj_num )
5523+ raise ValueError ('Could not convert font obj %d to Type1C.' % obj_num )
54725524 type1c_size += type1c_objs [obj_num ].size
5525+ if obj_num not in encodings :
5526+ raise ValueError ('Missing encoding for font obj %d.' % obj_num )
54735527 # TODO(pts): Don't remove if command-line flag.
54745528 os .remove (pdf_tmp_file_name )
54755529 # TODO(pts): Undo if no reduction in size.
54765530 print >> sys .stderr , (
54775531 'info: optimized total Type1 font size %s to Type1C font size %s '
54785532 '(%s)' %
54795533 (type1_size , type1c_size , FormatPercent (type1c_size , type1_size )))
5480- return type1c_objs
5534+ return type1c_objs , encodings
54815535
54825536
54835537 @classmethod
@@ -5621,11 +5675,11 @@ def MoveToPrivate(parsed_font, key):
56215675 def ConvertType1FontsToType1C (self ):
56225676 """Convert all Type1 fonts to Type1C in self, returns self."""
56235677 # GenerateType1CFontsFromType1 removes the tmp files it creates.
5624- type1c_objs = self .GenerateType1CFontsFromType1 (
5678+ type1c_objs , encodings = self .GenerateType1CFontsFromType1 (
56255679 self .GetFonts ('Type1' ), self .objs ,
56265680 TMP_PREFIX + 'conv.tmp.ps' , TMP_PREFIX + 'conv.tmp.pdf' )
56275681 for obj_num in type1c_objs :
5628- obj = self .objs [obj_num ]
5682+ obj = self .objs [obj_num ] # obj.get('Type') == 'FontDescriptor'.
56295683 assert str (obj .Get ('FontName' )).startswith ('/' )
56305684 type1c_obj = type1c_objs [obj_num ]
56315685 type1c_obj .FixFontNameInType1C (objs = self .objs )
@@ -5646,10 +5700,33 @@ def ConvertType1FontsToType1C(self):
56465700 FormatPercent (new_size , old_size )))
56475701 else :
56485702 # TODO(pts): How to optimize/unify these?
5703+ # TODO(pts): Don't keep, prevents further optimizations.
56495704 print >> sys .stderr , (
56505705 'info: keeping original Type1 font XObject %s,%s, '
56515706 'replacement too large: old size=%s, new size=%s' %
56525707 (obj_num , font_file_obj_num , old_size , new_size ))
5708+ encodings .pop (obj_num , None )
5709+
5710+ # Update encodings.
5711+ if encodings :
5712+ for obj_num in sorted (self .objs ):
5713+ obj = self .objs [obj_num ]
5714+ head = obj .head
5715+ if (head .startswith ('<<' ) and
5716+ '/Font' in head and '/Type' in head and
5717+ '/Type1' in head and '/Subtype' in head and
5718+ '/FontDescriptor' in head and
5719+ obj .Get ('Type' ) == '/Font' and
5720+ obj .Get ('Subtype' ) == '/Type1' ):
5721+ match = obj .PDF_REF_AT_EOS_RE .match (str (obj .Get ('FontDescriptor' )))
5722+ if match :
5723+ fd_obj_num = int (match .group (1 )) # /Type/FontDescriptor.
5724+ if (fd_obj_num in encodings and
5725+ self .IsFontBuiltInEncodingUsed (
5726+ obj .ResolveReferences (obj .Get ('Encoding' ),
5727+ objs = self .objs )[0 ])):
5728+ obj .Set ('Encoding' , self .FormatEncoding (encodings [fd_obj_num ]))
5729+
56535730 return self
56545731
56555732 @classmethod
@@ -6070,7 +6147,8 @@ def _ProcessType1CFonts(self, type1c_objs, do_unify_fonts,
60706147 for obj_num in sorted (self .objs ):
60716148 obj = self .objs [obj_num ]
60726149 head = obj .head
6073- if ('/Font' in head and '/Type' in head and
6150+ if (head .startswith ('<<' ) and
6151+ '/Font' in head and '/Type' in head and
60746152 '/Type1' in head and '/Subtype' in head and
60756153 '/FontDescriptor' in head and
60766154 obj .Get ('Type' ) == '/Font' and
0 commit comments