Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2313e15

Browse files
committed
#20206, #5803: more efficient algorithm that doesn't truncate output.
This fixes an edge case (20206) where if the input ended in a character needing encoding but there was no newline on the string, the last byte of the encoded character would be dropped. The fix is to use a more efficient algorithm, provided by Serhiy Storchaka (5803), that does not have the bug.
1 parent 2a3d7d1 commit 2313e15

3 files changed

Lines changed: 69 additions & 82 deletions

File tree

Lib/email/quoprimime.py

Lines changed: 60 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@
5353
# space-wise. Remember that headers and bodies have different sets of safe
5454
# characters. Initialize both maps with the full expansion, and then override
5555
# the safe bytes with the more compact form.
56-
_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
57-
_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
56+
_QUOPRI_MAP = ['=%02X' % c for c in range(256)]
57+
_QUOPRI_HEADER_MAP = _QUOPRI_MAP[:]
58+
_QUOPRI_BODY_MAP = _QUOPRI_MAP[:]
5859

5960
# Safe header bytes which need no encoding.
6061
for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
@@ -121,8 +122,7 @@ def unquote(s):
121122

122123

123124
def quote(c):
124-
return '=%02X' % ord(c)
125-
125+
return _QUOPRI_MAP[ord(c)]
126126

127127

128128
def header_encode(header_bytes, charset='iso-8859-1'):
@@ -140,68 +140,16 @@ def header_encode(header_bytes, charset='iso-8859-1'):
140140
if not header_bytes:
141141
return ''
142142
# Iterate over every byte, encoding if necessary.
143-
encoded = []
144-
for octet in header_bytes:
145-
encoded.append(_QUOPRI_HEADER_MAP[octet])
143+
encoded = header_bytes.decode('latin1').translate(_QUOPRI_HEADER_MAP)
146144
# Now add the RFC chrome to each encoded chunk and glue the chunks
147145
# together.
148-
return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
149-
150-
151-
class _body_accumulator(io.StringIO):
152-
153-
def __init__(self, maxlinelen, eol, *args, **kw):
154-
super().__init__(*args, **kw)
155-
self.eol = eol
156-
self.maxlinelen = self.room = maxlinelen
157-
158-
def write_str(self, s):
159-
"""Add string s to the accumulated body."""
160-
self.write(s)
161-
self.room -= len(s)
162-
163-
def newline(self):
164-
"""Write eol, then start new line."""
165-
self.write_str(self.eol)
166-
self.room = self.maxlinelen
167-
168-
def write_soft_break(self):
169-
"""Write a soft break, then start a new line."""
170-
self.write_str('=')
171-
self.newline()
172-
173-
def write_wrapped(self, s, extra_room=0):
174-
"""Add a soft line break if needed, then write s."""
175-
if self.room < len(s) + extra_room:
176-
self.write_soft_break()
177-
self.write_str(s)
178-
179-
def write_char(self, c, is_last_char):
180-
if not is_last_char:
181-
# Another character follows on this line, so we must leave
182-
# extra room, either for it or a soft break, and whitespace
183-
# need not be quoted.
184-
self.write_wrapped(c, extra_room=1)
185-
elif c not in ' \t':
186-
# For this and remaining cases, no more characters follow,
187-
# so there is no need to reserve extra room (since a hard
188-
# break will immediately follow).
189-
self.write_wrapped(c)
190-
elif self.room >= 3:
191-
# It's a whitespace character at end-of-line, and we have room
192-
# for the three-character quoted encoding.
193-
self.write(quote(c))
194-
elif self.room == 2:
195-
# There's room for the whitespace character and a soft break.
196-
self.write(c)
197-
self.write_soft_break()
198-
else:
199-
# There's room only for a soft break. The quoted whitespace
200-
# will be the only content on the subsequent line.
201-
self.write_soft_break()
202-
self.write(quote(c))
146+
return '=?%s?q?%s?=' % (charset, encoded)
203147

204148

149+
_QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:]
150+
for c in b'\r\n':
151+
_QUOPRI_BODY_ENCODE_MAP[c] = chr(c)
152+
205153
def body_encode(body, maxlinelen=76, eol=NL):
206154
"""Encode with quoted-printable, wrapping at maxlinelen characters.
207155
@@ -226,26 +174,56 @@ def body_encode(body, maxlinelen=76, eol=NL):
226174
if not body:
227175
return body
228176

229-
# The last line may or may not end in eol, but all other lines do.
230-
last_has_eol = (body[-1] in '\r\n')
231-
232-
# This accumulator will make it easier to build the encoded body.
233-
encoded_body = _body_accumulator(maxlinelen, eol)
234-
235-
lines = body.splitlines()
236-
last_line_no = len(lines) - 1
237-
for line_no, line in enumerate(lines):
238-
last_char_index = len(line) - 1
239-
for i, c in enumerate(line):
240-
if body_check(ord(c)):
241-
c = quote(c)
242-
encoded_body.write_char(c, i==last_char_index)
243-
# Add an eol if input line had eol. All input lines have eol except
244-
# possibly the last one.
245-
if line_no < last_line_no or last_has_eol:
246-
encoded_body.newline()
247-
248-
return encoded_body.getvalue()
177+
# quote speacial characters
178+
body = body.translate(_QUOPRI_BODY_ENCODE_MAP)
179+
180+
soft_break = '=' + eol
181+
# leave space for the '=' at the end of a line
182+
maxlinelen1 = maxlinelen - 1
183+
184+
encoded_body = []
185+
append = encoded_body.append
186+
187+
for line in body.splitlines():
188+
# break up the line into pieces no longer than maxlinelen - 1
189+
start = 0
190+
laststart = len(line) - 1 - maxlinelen
191+
while start <= laststart:
192+
stop = start + maxlinelen1
193+
# make sure we don't break up an escape sequence
194+
if line[stop - 2] == '=':
195+
append(line[start:stop - 1])
196+
start = stop - 2
197+
elif line[stop - 1] == '=':
198+
append(line[start:stop])
199+
start = stop - 1
200+
else:
201+
append(line[start:stop] + '=')
202+
start = stop
203+
204+
# handle rest of line, special case if line ends in whitespace
205+
if line and line[-1] in ' \t':
206+
room = start - laststart
207+
if room >= 3:
208+
# It's a whitespace character at end-of-line, and we have room
209+
# for the three-character quoted encoding.
210+
q = quote(line[-1])
211+
elif room == 2:
212+
# There's room for the whitespace character and a soft break.
213+
q = line[-1] + soft_break
214+
else:
215+
# There's room only for a soft break. The quoted whitespace
216+
# will be the only content on the subsequent line.
217+
q = soft_break + quote(line[-1])
218+
append(line[start:-1] + q)
219+
else:
220+
append(line[start:])
221+
222+
# add back final newline if present
223+
if body[-1] in CRLF:
224+
append('')
225+
226+
return eol.join(encoded_body)
249227

250228

251229

Lib/test/test_email/test_email.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4216,6 +4216,11 @@ def test_encode_one_line_crlf(self):
42164216
def test_encode_one_line_eol(self):
42174217
self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
42184218

4219+
def test_encode_one_line_eol_after_non_ascii(self):
4220+
# issue 20206; see changeset 0cf700464177 for why the encode/decode.
4221+
self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4222+
'hello=CF=85\r\n', eol='\r\n')
4223+
42194224
def test_encode_one_space(self):
42204225
self._test_encode(' ', '=20')
42214226

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ Core and Builtins
4343
Library
4444
-------
4545

46+
- Issues #20206 and #5803: Fix edge case in email.quoprimime.encode where it
47+
truncated lines ending in a character needing encoding but no newline by
48+
using a more efficient algorithm that doesn't have the bug.
49+
4650
- Issue #19082: Working xmlrpc.server and xmlrpc.client examples. Both in
4751
modules and in documentation. Initial patch contributed by Vajrasky Kok.
4852

0 commit comments

Comments
 (0)