@@ -321,6 +321,21 @@ def pdfRepr(obj):
321
321
.format (type (obj )))
322
322
323
323
324
+ def _font_supports_char (fonttype , char ):
325
+ """
326
+ Returns True if the font is able to provided the char in a PDF
327
+
328
+ For a Type 3 font, this method returns True only for single-byte
329
+ chars. For Type 42 fonts this method return True if the char is from
330
+ the Basic Multilingual Plane.
331
+ """
332
+ if fonttype == 3 :
333
+ return ord (char ) <= 255
334
+ if fonttype == 42 :
335
+ return ord (char ) <= 65535
336
+ return True
337
+
338
+
324
339
class Reference :
325
340
"""
326
341
PDF reference object.
@@ -1194,6 +1209,8 @@ def embedTTFType42(font, characters, descriptor):
1194
1209
wObject = self .reserveObject ('Type 0 widths' )
1195
1210
toUnicodeMapObject = self .reserveObject ('ToUnicode map' )
1196
1211
1212
+ bbox = [cvt (x , nearest = False ) for x in font .bbox ]
1213
+
1197
1214
cidFontDict = {
1198
1215
'Type' : Name ('Font' ),
1199
1216
'Subtype' : Name ('CIDFontType2' ),
@@ -1268,13 +1285,47 @@ def embedTTFType42(font, characters, descriptor):
1268
1285
1269
1286
unicode_bfrange = []
1270
1287
for start , end in unicode_groups :
1288
+ # Ensure the CID map contains only chars from BMP
1289
+ if start > 65535 :
1290
+ continue
1291
+ end = min (65535 , end )
1292
+
1271
1293
unicode_bfrange .append (
1272
1294
b"<%04x> <%04x> [%s]" %
1273
1295
(start , end ,
1274
1296
b" " .join (b"<%04x>" % x for x in range (start , end + 1 ))))
1275
1297
unicode_cmap = (self ._identityToUnicodeCMap %
1276
1298
(len (unicode_groups ), b"\n " .join (unicode_bfrange )))
1277
1299
1300
+ # Add XObjects for unsupported chars
1301
+ glyph_ids = []
1302
+ for ccode in characters :
1303
+ if not _font_supports_char (fonttype , chr (ccode )):
1304
+ gind = font .get_char_index (ccode )
1305
+ glyph_ids .append (gind )
1306
+
1307
+ rawcharprocs = _get_pdf_charprocs (filename , glyph_ids )
1308
+ for charname in sorted (rawcharprocs ):
1309
+ stream = rawcharprocs [charname ]
1310
+ charprocDict = {'Length' : len (stream )}
1311
+ charprocDict ['Type' ] = Name ('XObject' )
1312
+ charprocDict ['Subtype' ] = Name ('Form' )
1313
+ charprocDict ['BBox' ] = bbox
1314
+ # Each glyph includes bounding box information,
1315
+ # but xpdf and ghostscript can't handle it in a
1316
+ # Form XObject (they segfault!!!), so we remove it
1317
+ # from the stream here. It's not needed anyway,
1318
+ # since the Form XObject includes it in its BBox
1319
+ # value.
1320
+ stream = stream [stream .find (b"d1" ) + 2 :]
1321
+ charprocObject = self .reserveObject ('charProc' )
1322
+ self .beginStream (charprocObject .id , None , charprocDict )
1323
+ self .currentstream .write (stream )
1324
+ self .endStream ()
1325
+
1326
+ name = self ._get_xobject_symbol_name (filename , charname )
1327
+ self .multi_byte_charprocs [name ] = charprocObject
1328
+
1278
1329
# CIDToGIDMap stream
1279
1330
cid_to_gid_map = "" .join (cid_to_gid_map ).encode ("utf-16be" )
1280
1331
self .beginStream (cidToGidMapObject .id ,
@@ -2106,16 +2157,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
2106
2157
self .check_gc (gc , gc ._rgb )
2107
2158
prev_font = None , None
2108
2159
oldx , oldy = 0 , 0
2109
- type3_multibytes = []
2160
+ unsupported_chars = []
2110
2161
2111
2162
self .file .output (Op .begin_text )
2112
2163
for font , fontsize , num , ox , oy in glyphs :
2113
- self .file ._character_tracker .track (font , chr (num ))
2164
+ char = chr (num )
2165
+ self .file ._character_tracker .track (font , char )
2114
2166
fontname = font .fname
2115
- if fonttype == 3 and num > 255 :
2116
- # For Type3 fonts, multibyte characters must be emitted
2117
- # separately (below).
2118
- type3_multibytes .append ((font , fontsize , ox , oy , num ))
2167
+ if not _font_supports_char ( fonttype , char ) :
2168
+ # Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2169
+ # Type 42) must be emitted separately (below).
2170
+ unsupported_chars .append ((font , fontsize , ox , oy , num ))
2119
2171
else :
2120
2172
self ._setup_textpos (ox , oy , 0 , oldx , oldy )
2121
2173
oldx , oldy = ox , oy
@@ -2127,7 +2179,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
2127
2179
Op .show )
2128
2180
self .file .output (Op .end_text )
2129
2181
2130
- for font , fontsize , ox , oy , num in type3_multibytes :
2182
+ for font , fontsize , ox , oy , num in unsupported_chars :
2131
2183
self ._draw_xobject_glyph (
2132
2184
font , fontsize , font .get_char_index (num ), ox , oy )
2133
2185
@@ -2236,18 +2288,6 @@ def encode_string(self, s, fonttype):
2236
2288
return s .encode ('cp1252' , 'replace' )
2237
2289
return s .encode ('utf-16be' , 'replace' )
2238
2290
2239
- @staticmethod
2240
- def _font_supports_char (fonttype , char ):
2241
- """
2242
- Returns True if the font is able to provided the char in a PDF
2243
-
2244
- For a Type 3 font, this method returns True only for single-byte
2245
- chars. For Type 42 fonts this method always returns True.
2246
- """
2247
- if fonttype == 3 :
2248
- return ord (char ) <= 255
2249
- return True
2250
-
2251
2291
def draw_text (self , gc , x , y , s , prop , angle , ismath = False , mtext = None ):
2252
2292
# docstring inherited
2253
2293
@@ -2311,7 +2351,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
2311
2351
prev_was_multibyte = True
2312
2352
for item in _text_helpers .layout (
2313
2353
s , font , kern_mode = KERNING_UNFITTED ):
2314
- if self . _font_supports_char (fonttype , item .char ):
2354
+ if _font_supports_char (fonttype , item .char ):
2315
2355
if prev_was_multibyte :
2316
2356
singlebyte_chunks .append ((item .x , []))
2317
2357
if item .prev_kern :
0 commit comments