@@ -321,6 +321,21 @@ def pdfRepr(obj):
321
321
.format (type (obj )))
322
322
323
323
324
+ def _font_supports_char (fonttype , char ):
325
+ """
326
+ Returns True if the font is able to provide *char* in a PDF.
327
+
328
+ For a Type 3 font, this method returns True only for single-byte
329
+ chars. For Type 42 fonts this method return True if the char is from
330
+ the Basic Multilingual Plane.
331
+ """
332
+ if fonttype == 3 :
333
+ return ord (char ) <= 255
334
+ if fonttype == 42 :
335
+ return ord (char ) <= 65535
336
+ raise NotImplementedError ()
337
+
338
+
324
339
class Reference :
325
340
"""
326
341
PDF reference object.
@@ -1268,13 +1283,48 @@ def embedTTFType42(font, characters, descriptor):
1268
1283
1269
1284
unicode_bfrange = []
1270
1285
for start , end in unicode_groups :
1286
+ # Ensure the CID map contains only chars from BMP
1287
+ if start > 65535 :
1288
+ continue
1289
+ end = min (65535 , end )
1290
+
1271
1291
unicode_bfrange .append (
1272
1292
b"<%04x> <%04x> [%s]" %
1273
1293
(start , end ,
1274
1294
b" " .join (b"<%04x>" % x for x in range (start , end + 1 ))))
1275
1295
unicode_cmap = (self ._identityToUnicodeCMap %
1276
1296
(len (unicode_groups ), b"\n " .join (unicode_bfrange )))
1277
1297
1298
+ # Add XObjects for unsupported chars
1299
+ glyph_ids = []
1300
+ for ccode in characters :
1301
+ if not _font_supports_char (fonttype , chr (ccode )):
1302
+ gind = font .get_char_index (ccode )
1303
+ glyph_ids .append (gind )
1304
+
1305
+ bbox = [cvt (x , nearest = False ) for x in font .bbox ]
1306
+ rawcharprocs = _get_pdf_charprocs (filename , glyph_ids )
1307
+ for charname in sorted (rawcharprocs ):
1308
+ stream = rawcharprocs [charname ]
1309
+ charprocDict = {'Length' : len (stream )}
1310
+ charprocDict ['Type' ] = Name ('XObject' )
1311
+ charprocDict ['Subtype' ] = Name ('Form' )
1312
+ charprocDict ['BBox' ] = bbox
1313
+ # Each glyph includes bounding box information,
1314
+ # but xpdf and ghostscript can't handle it in a
1315
+ # Form XObject (they segfault!!!), so we remove it
1316
+ # from the stream here. It's not needed anyway,
1317
+ # since the Form XObject includes it in its BBox
1318
+ # value.
1319
+ stream = stream [stream .find (b"d1" ) + 2 :]
1320
+ charprocObject = self .reserveObject ('charProc' )
1321
+ self .beginStream (charprocObject .id , None , charprocDict )
1322
+ self .currentstream .write (stream )
1323
+ self .endStream ()
1324
+
1325
+ name = self ._get_xobject_symbol_name (filename , charname )
1326
+ self .multi_byte_charprocs [name ] = charprocObject
1327
+
1278
1328
# CIDToGIDMap stream
1279
1329
cid_to_gid_map = "" .join (cid_to_gid_map ).encode ("utf-16be" )
1280
1330
self .beginStream (cidToGidMapObject .id ,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
2106
2156
self .check_gc (gc , gc ._rgb )
2107
2157
prev_font = None , None
2108
2158
oldx , oldy = 0 , 0
2109
- type3_multibytes = []
2159
+ unsupported_chars = []
2110
2160
2111
2161
self .file .output (Op .begin_text )
2112
2162
for font , fontsize , num , ox , oy in glyphs :
2113
- self .file ._character_tracker .track (font , chr (num ))
2163
+ char = chr (num )
2164
+ self .file ._character_tracker .track (font , char )
2114
2165
fontname = font .fname
2115
- if fonttype == 3 and num > 255 :
2116
- # For Type3 fonts, multibyte characters must be emitted
2117
- # separately (below).
2118
- type3_multibytes .append ((font , fontsize , ox , oy , num ))
2166
+ if not _font_supports_char ( fonttype , char ) :
2167
+ # Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168
+ # Type 42) must be emitted separately (below).
2169
+ unsupported_chars .append ((font , fontsize , ox , oy , num ))
2119
2170
else :
2120
2171
self ._setup_textpos (ox , oy , 0 , oldx , oldy )
2121
2172
oldx , oldy = ox , oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
2127
2178
Op .show )
2128
2179
self .file .output (Op .end_text )
2129
2180
2130
- for font , fontsize , ox , oy , num in type3_multibytes :
2181
+ for font , fontsize , ox , oy , num in unsupported_chars :
2131
2182
self ._draw_xobject_glyph (
2132
2183
font , fontsize , font .get_char_index (num ), ox , oy )
2133
2184
@@ -2236,20 +2287,6 @@ def encode_string(self, s, fonttype):
2236
2287
return s .encode ('cp1252' , 'replace' )
2237
2288
return s .encode ('utf-16be' , 'replace' )
2238
2289
2239
- @staticmethod
2240
- def _font_supports_char (fonttype , char ):
2241
- """
2242
- Returns True if the font is able to provided the char in a PDF
2243
-
2244
- For a Type 3 font, this method returns True only for single-byte
2245
- chars. For Type 42 fonts this method always returns True.
2246
- """
2247
- if fonttype == 3 :
2248
- return ord (char ) <= 255
2249
- if fonttype == 42 :
2250
- return True
2251
- raise NotImplementedError ()
2252
-
2253
2290
def draw_text (self , gc , x , y , s , prop , angle , ismath = False , mtext = None ):
2254
2291
# docstring inherited
2255
2292
@@ -2313,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
2313
2350
prev_was_multibyte = True
2314
2351
for item in _text_helpers .layout (
2315
2352
s , font , kern_mode = KERNING_UNFITTED ):
2316
- if self . _font_supports_char (fonttype , item .char ):
2353
+ if _font_supports_char (fonttype , item .char ):
2317
2354
if prev_was_multibyte :
2318
2355
singlebyte_chunks .append ((item .x , []))
2319
2356
if item .prev_kern :
0 commit comments