|
13 | 13 | import logging
|
14 | 14 | import math
|
15 | 15 | import os
|
16 |
| -import re |
17 | 16 | import string
|
18 | 17 | import struct
|
19 | 18 | import sys
|
@@ -119,25 +118,6 @@ def _fill(strings, linelen=75):
|
119 | 118 | result.append(b' '.join(strings[lasti:]))
|
120 | 119 | return b'\n'.join(result)
|
121 | 120 |
|
122 |
| -# PDF strings are supposed to be able to include any eight-bit data, |
123 |
| -# except that unbalanced parens and backslashes must be escaped by a |
124 |
| -# backslash. However, sf bug #2708559 shows that the carriage return |
125 |
| -# character may get read as a newline; these characters correspond to |
126 |
| -# \gamma and \Omega in TeX's math font encoding. Escaping them fixes |
127 |
| -# the bug. |
128 |
| -_string_escape_regex = re.compile(br'([\\()\r\n])') |
129 |
| - |
130 |
| - |
131 |
| -def _string_escape(match): |
132 |
| - m = match.group(0) |
133 |
| - if m in br'\()': |
134 |
| - return b'\\' + m |
135 |
| - elif m == b'\n': |
136 |
| - return br'\n' |
137 |
| - elif m == b'\r': |
138 |
| - return br'\r' |
139 |
| - assert False |
140 |
| - |
141 | 121 |
|
142 | 122 | def _create_pdf_info_dict(backend, metadata):
|
143 | 123 | """
|
@@ -250,6 +230,15 @@ def _datetime_to_pdf(d):
|
250 | 230 | return r
|
251 | 231 |
|
252 | 232 |
|
| 233 | +# PDF strings are supposed to be able to include any eight-bit data, except |
| 234 | +# that unbalanced parens and backslashes must be escaped by a backslash. |
| 235 | +# However, sf bug #2708559 shows that the carriage return character may get |
| 236 | +# read as a newline; these characters correspond to \gamma and \Omega in TeX's |
| 237 | +# math font encoding. Escaping them fixes the bug. |
| 238 | +_str_escapes = str.maketrans({ |
| 239 | + '\\': '\\\\', '(': '\\(', ')': '\\)', '\n': '\\n', '\r': '\\r'}) |
| 240 | + |
| 241 | + |
253 | 242 | def pdfRepr(obj):
|
254 | 243 | """Map Python objects to PDF syntax."""
|
255 | 244 |
|
@@ -289,8 +278,12 @@ def pdfRepr(obj):
|
289 | 278 | # escaped. Actually balanced parens are allowed, but it is
|
290 | 279 | # simpler to escape them all. TODO: cut long strings into lines;
|
291 | 280 | # I believe there is some maximum line length in PDF.
|
| 281 | + # Despite the extra decode/encode, translate is faster than regex. |
292 | 282 | elif isinstance(obj, bytes):
|
293 |
| - return b'(' + _string_escape_regex.sub(_string_escape, obj) + b')' |
| 283 | + return ( |
| 284 | + b'(' + |
| 285 | + obj.decode('latin-1').translate(_str_escapes).encode('latin-1') |
| 286 | + + b')') |
294 | 287 |
|
295 | 288 | # Dictionaries. The keys must be PDF names, so if we find strings
|
296 | 289 | # there, we make Name objects from them. The values may be
|
|
0 commit comments