Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 675a7b2

Browse files
committed
Emit Type 42 chars beyond BMP as XObjects
Currently, the CID maps only support 2-byte fixed-width characters. Unicode points beyond the Basic Multilingual Plane cannot be used. This comment follows the strategy taken for Type 3 fonts. Any char with a code point > 65535 is emitted as an XObject.
1 parent f980d13 commit 675a7b2

File tree

1 file changed

+59
-20
lines changed

1 file changed

+59
-20
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 59 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,20 @@ def pdfRepr(obj):
320320
raise TypeError("Don't know a PDF representation for {} objects"
321321
.format(type(obj)))
322322

323+
def _font_supports_char(fonttype, char):
324+
"""
325+
Returns True if the font is able to provided the char in a PDF
326+
327+
For a Type 3 font, this method returns True only for single-byte
328+
chars. For Type 42 fonts this method return True if the char is from
329+
the Basic Multilingual Plane.
330+
"""
331+
if fonttype == 3:
332+
return ord(char) <= 255
333+
if fonttype == 42:
334+
return ord(char) <= 65535
335+
return True
336+
323337

324338
class Reference:
325339
"""
@@ -1194,6 +1208,8 @@ def embedTTFType42(font, characters, descriptor):
11941208
wObject = self.reserveObject('Type 0 widths')
11951209
toUnicodeMapObject = self.reserveObject('ToUnicode map')
11961210

1211+
bbox = [cvt(x, nearest=False) for x in font.bbox]
1212+
11971213
cidFontDict = {
11981214
'Type': Name('Font'),
11991215
'Subtype': Name('CIDFontType2'),
@@ -1268,13 +1284,47 @@ def embedTTFType42(font, characters, descriptor):
12681284

12691285
unicode_bfrange = []
12701286
for start, end in unicode_groups:
1287+
# Ensure the CID map contains only chars from BMP
1288+
if start > 65535:
1289+
continue
1290+
end = min(65535, end)
1291+
12711292
unicode_bfrange.append(
12721293
b"<%04x> <%04x> [%s]" %
12731294
(start, end,
12741295
b" ".join(b"<%04x>" % x for x in range(start, end+1))))
12751296
unicode_cmap = (self._identityToUnicodeCMap %
12761297
(len(unicode_groups), b"\n".join(unicode_bfrange)))
12771298

1299+
# Add XObjects for unsupported chars
1300+
glyph_ids = []
1301+
for ccode in characters:
1302+
if not _font_supports_char(fonttype, chr(ccode)):
1303+
gind = font.get_char_index(ccode)
1304+
glyph_ids.append(gind)
1305+
1306+
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
1307+
for charname in sorted(rawcharprocs):
1308+
stream = rawcharprocs[charname]
1309+
charprocDict = {'Length': len(stream)}
1310+
charprocDict['Type'] = Name('XObject')
1311+
charprocDict['Subtype'] = Name('Form')
1312+
charprocDict['BBox'] = bbox
1313+
# Each glyph includes bounding box information,
1314+
# but xpdf and ghostscript can't handle it in a
1315+
# Form XObject (they segfault!!!), so we remove it
1316+
# from the stream here. It's not needed anyway,
1317+
# since the Form XObject includes it in its BBox
1318+
# value.
1319+
stream = stream[stream.find(b"d1") + 2:]
1320+
charprocObject = self.reserveObject('charProc')
1321+
self.beginStream(charprocObject.id, None, charprocDict)
1322+
self.currentstream.write(stream)
1323+
self.endStream()
1324+
1325+
name = self._get_xobject_symbol_name(filename, charname)
1326+
self.multi_byte_charprocs[name] = charprocObject
1327+
12781328
# CIDToGIDMap stream
12791329
cid_to_gid_map = "".join(cid_to_gid_map).encode("utf-16be")
12801330
self.beginStream(cidToGidMapObject.id,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062156
self.check_gc(gc, gc._rgb)
21072157
prev_font = None, None
21082158
oldx, oldy = 0, 0
2109-
type3_multibytes = []
2159+
unsupported_chars = []
21102160

21112161
self.file.output(Op.begin_text)
21122162
for font, fontsize, num, ox, oy in glyphs:
2113-
self.file._character_tracker.track(font, chr(num))
2163+
char = chr(num)
2164+
self.file._character_tracker.track(font, char)
21142165
fontname = font.fname
2115-
if fonttype == 3 and num > 255:
2116-
# For Type3 fonts, multibyte characters must be emitted
2117-
# separately (below).
2118-
type3_multibytes.append((font, fontsize, ox, oy, num))
2166+
if not _font_supports_char(fonttype, char):
2167+
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168+
# Type 42) must be emitted separately (below).
2169+
unsupported_chars.append((font, fontsize, ox, oy, num))
21192170
else:
21202171
self._setup_textpos(ox, oy, 0, oldx, oldy)
21212172
oldx, oldy = ox, oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272178
Op.show)
21282179
self.file.output(Op.end_text)
21292180

2130-
for font, fontsize, ox, oy, num in type3_multibytes:
2181+
for font, fontsize, ox, oy, num in unsupported_chars:
21312182
self._draw_xobject_glyph(
21322183
font, fontsize, font.get_char_index(num), ox, oy)
21332184

@@ -2236,18 +2287,6 @@ def encode_string(self, s, fonttype):
22362287
return s.encode('cp1252', 'replace')
22372288
return s.encode('utf-16be', 'replace')
22382289

2239-
@staticmethod
2240-
def _font_supports_char(fonttype, char):
2241-
"""
2242-
Returns True if the font is able to provided the char in a PDF
2243-
2244-
For a Type 3 font, this method returns True only for single-byte
2245-
chars. For Type 42 fonts this method always returns True.
2246-
"""
2247-
if fonttype == 3:
2248-
return ord(char) <= 255
2249-
return True
2250-
22512290
def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
22522291
# docstring inherited
22532292

@@ -2311,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23112350
prev_was_multibyte = True
23122351
for item in _text_helpers.layout(
23132352
s, font, kern_mode=KERNING_UNFITTED):
2314-
if self._font_supports_char(fonttype, item.char):
2353+
if _font_supports_char(fonttype, item.char):
23152354
if prev_was_multibyte:
23162355
singlebyte_chunks.append((item.x, []))
23172356
if item.prev_kern:

0 commit comments

Comments
 (0)