Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cded989

Browse files
committed
Emit Type 42 chars beyond BMP as XObjects
Currently, the CID maps only support 2-byte fixed-width characters. Unicode points beyond the Basic Multilingual Plane cannot be used. This comment follows the strategy taken for Type 3 fonts. Any char with a code point > 65535 is emitted as an XObject.
1 parent a5fe8c9 commit cded989

File tree

1 file changed

+60
-20
lines changed

1 file changed

+60
-20
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,21 @@ def pdfRepr(obj):
321321
.format(type(obj)))
322322

323323

324+
def _font_supports_char(fonttype, char):
325+
"""
326+
Returns True if the font is able to provided the char in a PDF
327+
328+
For a Type 3 font, this method returns True only for single-byte
329+
chars. For Type 42 fonts this method return True if the char is from
330+
the Basic Multilingual Plane.
331+
"""
332+
if fonttype == 3:
333+
return ord(char) <= 255
334+
if fonttype == 42:
335+
return ord(char) <= 65535
336+
return True
337+
338+
324339
class Reference:
325340
"""
326341
PDF reference object.
@@ -1194,6 +1209,8 @@ def embedTTFType42(font, characters, descriptor):
11941209
wObject = self.reserveObject('Type 0 widths')
11951210
toUnicodeMapObject = self.reserveObject('ToUnicode map')
11961211

1212+
bbox = [cvt(x, nearest=False) for x in font.bbox]
1213+
11971214
cidFontDict = {
11981215
'Type': Name('Font'),
11991216
'Subtype': Name('CIDFontType2'),
@@ -1268,13 +1285,47 @@ def embedTTFType42(font, characters, descriptor):
12681285

12691286
unicode_bfrange = []
12701287
for start, end in unicode_groups:
1288+
# Ensure the CID map contains only chars from BMP
1289+
if start > 65535:
1290+
continue
1291+
end = min(65535, end)
1292+
12711293
unicode_bfrange.append(
12721294
b"<%04x> <%04x> [%s]" %
12731295
(start, end,
12741296
b" ".join(b"<%04x>" % x for x in range(start, end+1))))
12751297
unicode_cmap = (self._identityToUnicodeCMap %
12761298
(len(unicode_groups), b"\n".join(unicode_bfrange)))
12771299

1300+
# Add XObjects for unsupported chars
1301+
glyph_ids = []
1302+
for ccode in characters:
1303+
if not _font_supports_char(fonttype, chr(ccode)):
1304+
gind = font.get_char_index(ccode)
1305+
glyph_ids.append(gind)
1306+
1307+
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
1308+
for charname in sorted(rawcharprocs):
1309+
stream = rawcharprocs[charname]
1310+
charprocDict = {'Length': len(stream)}
1311+
charprocDict['Type'] = Name('XObject')
1312+
charprocDict['Subtype'] = Name('Form')
1313+
charprocDict['BBox'] = bbox
1314+
# Each glyph includes bounding box information,
1315+
# but xpdf and ghostscript can't handle it in a
1316+
# Form XObject (they segfault!!!), so we remove it
1317+
# from the stream here. It's not needed anyway,
1318+
# since the Form XObject includes it in its BBox
1319+
# value.
1320+
stream = stream[stream.find(b"d1") + 2:]
1321+
charprocObject = self.reserveObject('charProc')
1322+
self.beginStream(charprocObject.id, None, charprocDict)
1323+
self.currentstream.write(stream)
1324+
self.endStream()
1325+
1326+
name = self._get_xobject_symbol_name(filename, charname)
1327+
self.multi_byte_charprocs[name] = charprocObject
1328+
12781329
# CIDToGIDMap stream
12791330
cid_to_gid_map = "".join(cid_to_gid_map).encode("utf-16be")
12801331
self.beginStream(cidToGidMapObject.id,
@@ -2106,16 +2157,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062157
self.check_gc(gc, gc._rgb)
21072158
prev_font = None, None
21082159
oldx, oldy = 0, 0
2109-
type3_multibytes = []
2160+
unsupported_chars = []
21102161

21112162
self.file.output(Op.begin_text)
21122163
for font, fontsize, num, ox, oy in glyphs:
2113-
self.file._character_tracker.track(font, chr(num))
2164+
char = chr(num)
2165+
self.file._character_tracker.track(font, char)
21142166
fontname = font.fname
2115-
if fonttype == 3 and num > 255:
2116-
# For Type3 fonts, multibyte characters must be emitted
2117-
# separately (below).
2118-
type3_multibytes.append((font, fontsize, ox, oy, num))
2167+
if not _font_supports_char(fonttype, char):
2168+
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2169+
# Type 42) must be emitted separately (below).
2170+
unsupported_chars.append((font, fontsize, ox, oy, num))
21192171
else:
21202172
self._setup_textpos(ox, oy, 0, oldx, oldy)
21212173
oldx, oldy = ox, oy
@@ -2127,7 +2179,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272179
Op.show)
21282180
self.file.output(Op.end_text)
21292181

2130-
for font, fontsize, ox, oy, num in type3_multibytes:
2182+
for font, fontsize, ox, oy, num in unsupported_chars:
21312183
self._draw_xobject_glyph(
21322184
font, fontsize, font.get_char_index(num), ox, oy)
21332185

@@ -2236,18 +2288,6 @@ def encode_string(self, s, fonttype):
22362288
return s.encode('cp1252', 'replace')
22372289
return s.encode('utf-16be', 'replace')
22382290

2239-
@staticmethod
2240-
def _font_supports_char(fonttype, char):
2241-
"""
2242-
Returns True if the font is able to provided the char in a PDF
2243-
2244-
For a Type 3 font, this method returns True only for single-byte
2245-
chars. For Type 42 fonts this method always returns True.
2246-
"""
2247-
if fonttype == 3:
2248-
return ord(char) <= 255
2249-
return True
2250-
22512291
def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
22522292
# docstring inherited
22532293

@@ -2311,7 +2351,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23112351
prev_was_multibyte = True
23122352
for item in _text_helpers.layout(
23132353
s, font, kern_mode=KERNING_UNFITTED):
2314-
if self._font_supports_char(fonttype, item.char):
2354+
if _font_supports_char(fonttype, item.char):
23152355
if prev_was_multibyte:
23162356
singlebyte_chunks.append((item.x, []))
23172357
if item.prev_kern:

0 commit comments

Comments
 (0)