@@ -320,6 +320,20 @@ def pdfRepr(obj):
320
320
raise TypeError ("Don't know a PDF representation for {} objects"
321
321
.format (type (obj )))
322
322
323
+ def _font_supports_char (fonttype , char ):
324
+ """
325
+ Returns True if the font is able to provided the char in a PDF
326
+
327
+ For a Type 3 font, this method returns True only for single-byte
328
+ chars. For Type 42 fonts this method return True if the char is from
329
+ the Basic Multilingual Plane.
330
+ """
331
+ if fonttype == 3 :
332
+ return ord (char ) <= 255
333
+ if fonttype == 42 :
334
+ return ord (char ) <= 65535
335
+ return True
336
+
323
337
324
338
class Reference :
325
339
"""
@@ -1194,6 +1208,8 @@ def embedTTFType42(font, characters, descriptor):
1194
1208
wObject = self .reserveObject ('Type 0 widths' )
1195
1209
toUnicodeMapObject = self .reserveObject ('ToUnicode map' )
1196
1210
1211
+ bbox = [cvt (x , nearest = False ) for x in font .bbox ]
1212
+
1197
1213
cidFontDict = {
1198
1214
'Type' : Name ('Font' ),
1199
1215
'Subtype' : Name ('CIDFontType2' ),
@@ -1268,13 +1284,47 @@ def embedTTFType42(font, characters, descriptor):
1268
1284
1269
1285
unicode_bfrange = []
1270
1286
for start , end in unicode_groups :
1287
+ # Ensure the CID map contains only chars from BMP
1288
+ if start > 65535 :
1289
+ continue
1290
+ end = min (65535 , end )
1291
+
1271
1292
unicode_bfrange .append (
1272
1293
b"<%04x> <%04x> [%s]" %
1273
1294
(start , end ,
1274
1295
b" " .join (b"<%04x>" % x for x in range (start , end + 1 ))))
1275
1296
unicode_cmap = (self ._identityToUnicodeCMap %
1276
1297
(len (unicode_groups ), b"\n " .join (unicode_bfrange )))
1277
1298
1299
+ # Add XObjects for unsupported chars
1300
+ glyph_ids = []
1301
+ for ccode in characters :
1302
+ if not _font_supports_char (fonttype , chr (ccode )):
1303
+ gind = font .get_char_index (ccode )
1304
+ glyph_ids .append (gind )
1305
+
1306
+ rawcharprocs = _get_pdf_charprocs (filename , glyph_ids )
1307
+ for charname in sorted (rawcharprocs ):
1308
+ stream = rawcharprocs [charname ]
1309
+ charprocDict = {'Length' : len (stream )}
1310
+ charprocDict ['Type' ] = Name ('XObject' )
1311
+ charprocDict ['Subtype' ] = Name ('Form' )
1312
+ charprocDict ['BBox' ] = bbox
1313
+ # Each glyph includes bounding box information,
1314
+ # but xpdf and ghostscript can't handle it in a
1315
+ # Form XObject (they segfault!!!), so we remove it
1316
+ # from the stream here. It's not needed anyway,
1317
+ # since the Form XObject includes it in its BBox
1318
+ # value.
1319
+ stream = stream [stream .find (b"d1" ) + 2 :]
1320
+ charprocObject = self .reserveObject ('charProc' )
1321
+ self .beginStream (charprocObject .id , None , charprocDict )
1322
+ self .currentstream .write (stream )
1323
+ self .endStream ()
1324
+
1325
+ name = self ._get_xobject_symbol_name (filename , charname )
1326
+ self .multi_byte_charprocs [name ] = charprocObject
1327
+
1278
1328
# CIDToGIDMap stream
1279
1329
cid_to_gid_map = "" .join (cid_to_gid_map ).encode ("utf-16be" )
1280
1330
self .beginStream (cidToGidMapObject .id ,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
2106
2156
self .check_gc (gc , gc ._rgb )
2107
2157
prev_font = None , None
2108
2158
oldx , oldy = 0 , 0
2109
- type3_multibytes = []
2159
+ unsupported_chars = []
2110
2160
2111
2161
self .file .output (Op .begin_text )
2112
2162
for font , fontsize , num , ox , oy in glyphs :
2113
- self .file ._character_tracker .track (font , chr (num ))
2163
+ char = chr (num )
2164
+ self .file ._character_tracker .track (font , char )
2114
2165
fontname = font .fname
2115
- if fonttype == 3 and num > 255 :
2116
- # For Type3 fonts, multibyte characters must be emitted
2117
- # separately (below).
2118
- type3_multibytes .append ((font , fontsize , ox , oy , num ))
2166
+ if not _font_supports_char ( fonttype , char ) :
2167
+ # Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168
+ # Type 42) must be emitted separately (below).
2169
+ unsupported_chars .append ((font , fontsize , ox , oy , num ))
2119
2170
else :
2120
2171
self ._setup_textpos (ox , oy , 0 , oldx , oldy )
2121
2172
oldx , oldy = ox , oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
2127
2178
Op .show )
2128
2179
self .file .output (Op .end_text )
2129
2180
2130
- for font , fontsize , ox , oy , num in type3_multibytes :
2181
+ for font , fontsize , ox , oy , num in unsupported_chars :
2131
2182
self ._draw_xobject_glyph (
2132
2183
font , fontsize , font .get_char_index (num ), ox , oy )
2133
2184
@@ -2236,18 +2287,6 @@ def encode_string(self, s, fonttype):
2236
2287
return s .encode ('cp1252' , 'replace' )
2237
2288
return s .encode ('utf-16be' , 'replace' )
2238
2289
2239
- @staticmethod
2240
- def _font_supports_char (fonttype , char ):
2241
- """
2242
- Returns True if the font is able to provided the char in a PDF
2243
-
2244
- For a Type 3 font, this method returns True only for single-byte
2245
- chars. For Type 42 fonts this method always returns True.
2246
- """
2247
- if fonttype == 3 :
2248
- return ord (char ) <= 255
2249
- return True
2250
-
2251
2290
def draw_text (self , gc , x , y , s , prop , angle , ismath = False , mtext = None ):
2252
2291
# docstring inherited
2253
2292
@@ -2311,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
2311
2350
prev_was_multibyte = True
2312
2351
for item in _text_helpers .layout (
2313
2352
s , font , kern_mode = KERNING_UNFITTED ):
2314
- if self . _font_supports_char (fonttype , item .char ):
2353
+ if _font_supports_char (fonttype , item .char ):
2315
2354
if prev_was_multibyte :
2316
2355
singlebyte_chunks .append ((item .x , []))
2317
2356
if item .prev_kern :
0 commit comments