Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit bd127f3

Browse files
authored
Merge pull request #20633 from sauerburger/pdf-non-bmp-chars
Emit non BMP chars as XObjects in PDF
2 parents 1a8caa5 + a4067a0 commit bd127f3

File tree

3 files changed

+67
-22
lines changed

3 files changed

+67
-22
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,21 @@ def pdfRepr(obj):
321321
.format(type(obj)))
322322

323323

324+
def _font_supports_char(fonttype, char):
325+
"""
326+
Returns True if the font is able to provide *char* in a PDF.
327+
328+
For a Type 3 font, this method returns True only for single-byte
329+
chars. For Type 42 fonts this method return True if the char is from
330+
the Basic Multilingual Plane.
331+
"""
332+
if fonttype == 3:
333+
return ord(char) <= 255
334+
if fonttype == 42:
335+
return ord(char) <= 65535
336+
raise NotImplementedError()
337+
338+
324339
class Reference:
325340
"""
326341
PDF reference object.
@@ -1268,13 +1283,48 @@ def embedTTFType42(font, characters, descriptor):
12681283

12691284
unicode_bfrange = []
12701285
for start, end in unicode_groups:
1286+
# Ensure the CID map contains only chars from BMP
1287+
if start > 65535:
1288+
continue
1289+
end = min(65535, end)
1290+
12711291
unicode_bfrange.append(
12721292
b"<%04x> <%04x> [%s]" %
12731293
(start, end,
12741294
b" ".join(b"<%04x>" % x for x in range(start, end+1))))
12751295
unicode_cmap = (self._identityToUnicodeCMap %
12761296
(len(unicode_groups), b"\n".join(unicode_bfrange)))
12771297

1298+
# Add XObjects for unsupported chars
1299+
glyph_ids = []
1300+
for ccode in characters:
1301+
if not _font_supports_char(fonttype, chr(ccode)):
1302+
gind = font.get_char_index(ccode)
1303+
glyph_ids.append(gind)
1304+
1305+
bbox = [cvt(x, nearest=False) for x in font.bbox]
1306+
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
1307+
for charname in sorted(rawcharprocs):
1308+
stream = rawcharprocs[charname]
1309+
charprocDict = {'Length': len(stream)}
1310+
charprocDict['Type'] = Name('XObject')
1311+
charprocDict['Subtype'] = Name('Form')
1312+
charprocDict['BBox'] = bbox
1313+
# Each glyph includes bounding box information,
1314+
# but xpdf and ghostscript can't handle it in a
1315+
# Form XObject (they segfault!!!), so we remove it
1316+
# from the stream here. It's not needed anyway,
1317+
# since the Form XObject includes it in its BBox
1318+
# value.
1319+
stream = stream[stream.find(b"d1") + 2:]
1320+
charprocObject = self.reserveObject('charProc')
1321+
self.beginStream(charprocObject.id, None, charprocDict)
1322+
self.currentstream.write(stream)
1323+
self.endStream()
1324+
1325+
name = self._get_xobject_symbol_name(filename, charname)
1326+
self.multi_byte_charprocs[name] = charprocObject
1327+
12781328
# CIDToGIDMap stream
12791329
cid_to_gid_map = "".join(cid_to_gid_map).encode("utf-16be")
12801330
self.beginStream(cidToGidMapObject.id,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062156
self.check_gc(gc, gc._rgb)
21072157
prev_font = None, None
21082158
oldx, oldy = 0, 0
2109-
type3_multibytes = []
2159+
unsupported_chars = []
21102160

21112161
self.file.output(Op.begin_text)
21122162
for font, fontsize, num, ox, oy in glyphs:
2113-
self.file._character_tracker.track(font, chr(num))
2163+
char = chr(num)
2164+
self.file._character_tracker.track(font, char)
21142165
fontname = font.fname
2115-
if fonttype == 3 and num > 255:
2116-
# For Type3 fonts, multibyte characters must be emitted
2117-
# separately (below).
2118-
type3_multibytes.append((font, fontsize, ox, oy, num))
2166+
if not _font_supports_char(fonttype, char):
2167+
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168+
# Type 42) must be emitted separately (below).
2169+
unsupported_chars.append((font, fontsize, ox, oy, num))
21192170
else:
21202171
self._setup_textpos(ox, oy, 0, oldx, oldy)
21212172
oldx, oldy = ox, oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272178
Op.show)
21282179
self.file.output(Op.end_text)
21292180

2130-
for font, fontsize, ox, oy, num in type3_multibytes:
2181+
for font, fontsize, ox, oy, num in unsupported_chars:
21312182
self._draw_xobject_glyph(
21322183
font, fontsize, font.get_char_index(num), ox, oy)
21332184

@@ -2236,20 +2287,6 @@ def encode_string(self, s, fonttype):
22362287
return s.encode('cp1252', 'replace')
22372288
return s.encode('utf-16be', 'replace')
22382289

2239-
@staticmethod
2240-
def _font_supports_char(fonttype, char):
2241-
"""
2242-
Returns True if the font is able to provided the char in a PDF
2243-
2244-
For a Type 3 font, this method returns True only for single-byte
2245-
chars. For Type 42 fonts this method always returns True.
2246-
"""
2247-
if fonttype == 3:
2248-
return ord(char) <= 255
2249-
if fonttype == 42:
2250-
return True
2251-
raise NotImplementedError()
2252-
22532290
def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
22542291
# docstring inherited
22552292

@@ -2313,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23132350
prev_was_multibyte = True
23142351
for item in _text_helpers.layout(
23152352
s, font, kern_mode=KERNING_UNFITTED):
2316-
if self._font_supports_char(fonttype, item.char):
2353+
if _font_supports_char(fonttype, item.char):
23172354
if prev_was_multibyte:
23182355
singlebyte_chunks.append((item.x, []))
23192356
if item.prev_kern:
Binary file not shown.

lib/matplotlib/tests/test_text.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,3 +748,11 @@ def test_pdf_font42_kerning():
748748
plt.rcParams['pdf.fonttype'] = 42
749749
plt.figure()
750750
plt.figtext(0.1, 0.5, "ATAVATAVATAVATAVATA", size=30)
751+
752+
753+
@image_comparison(['text_pdf_chars_beyond_bmp.pdf'], style='mpl20')
754+
def test_pdf_chars_beyond_bmp():
755+
plt.rcParams['pdf.fonttype'] = 42
756+
plt.rcParams['mathtext.fontset'] = 'stixsans'
757+
plt.figure()
758+
plt.figtext(0.1, 0.5, "Mass $m$ \U00010308", size=30)

0 commit comments

Comments
 (0)