Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Fix email header wrapping omitting white space
  • Loading branch information
robsdedude committed Feb 10, 2026
commit b5925e056e5e03f642926204872ac40ce8643dbf
78 changes: 46 additions & 32 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2835,6 +2835,30 @@ def _steal_trailing_WSP_if_exists(lines):
lines.pop()
return wsp

def _steal_all_trailing_WSP_if_exists(lines):
Comment thread
robsdedude marked this conversation as resolved.
Outdated
lines_popped = False
wsp_lines = []
while lines and lines[-1]:
for i in range(len(lines[-1]), -1, -1):
if i <= 0:
break
if lines[-1][i - 1] not in WSP:
break
wsp_line = lines[-1][i:]
if not wsp_line:
break
wsp_lines.insert(0, wsp_line)
lines[-1] = lines[-1][:i]
if not lines[-1]:
lines_popped = True
lines.pop()
else:
break

if lines_popped:
lines.append(' ')
return ''.join(wsp_lines)
Comment thread
robsdedude marked this conversation as resolved.
Outdated

def _refold_parse_tree(parse_tree, *, policy):
"""Return string of contents of parse_tree folded according to RFC rules.

Expand All @@ -2843,9 +2867,7 @@ def _refold_parse_tree(parse_tree, *, policy):
maxlen = policy.max_line_length or sys.maxsize
encoding = 'utf-8' if policy.utf8 else 'us-ascii'
lines = [''] # Folded lines to be output
leading_whitespace = '' # When we have whitespace between two encoded
# words, we may need to encode the whitespace
# at the beginning of the second word.
last_word_is_ew = False
last_ew = None # Points to the last encoded character if there's an ew on
Comment thread
robsdedude marked this conversation as resolved.
Outdated
# the line
last_charset = None
Expand Down Expand Up @@ -2882,6 +2904,7 @@ def _refold_parse_tree(parse_tree, *, policy):
if part.token_type == 'mime-parameters':
# Mime parameter folding (using RFC2231) is extra special.
_fold_mime_parameters(part, lines, maxlen, encoding)
last_word_is_ew = False
continue

if want_encoding and not wrap_as_ew_blocked:
Expand All @@ -2898,6 +2921,7 @@ def _refold_parse_tree(parse_tree, *, policy):
# XXX what if encoded_part has no leading FWS?
lines.append(newline)
lines[-1] += encoded_part
last_word_is_ew = False
continue
# Either this is not a major syntactic break, so we don't
# want it on a line by itself even if it fits, or it
Expand All @@ -2917,10 +2941,8 @@ def _refold_parse_tree(parse_tree, *, policy):
last_charset == 'utf-8' and charset != 'us-ascii')):
last_ew = None
last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
part.ew_combine_allowed, charset, leading_whitespace)
# This whitespace has been added to the lines in _fold_as_ew()
# so clear it now.
leading_whitespace = ''
part.ew_combine_allowed, charset, last_word_is_ew)
Comment thread
robsdedude marked this conversation as resolved.
Outdated
last_word_is_ew = True
last_charset = charset
want_encoding = False
continue
Expand All @@ -2933,28 +2955,20 @@ def _refold_parse_tree(parse_tree, *, policy):

if len(tstr) <= maxlen - len(lines[-1]):
lines[-1] += tstr
if any(char not in WSP for char in tstr):
last_word_is_ew = False
Comment thread
robsdedude marked this conversation as resolved.
Outdated
continue

# This part is too long to fit. The RFC wants us to break at
# "major syntactic breaks", so unless we don't consider this
# to be one, check if it will fit on the next line by itself.
leading_whitespace = ''
if (part.syntactic_break and
len(tstr) + 1 <= maxlen):
newline = _steal_trailing_WSP_if_exists(lines)
if newline or part.startswith_fws():
# We're going to fold the data onto a new line here. Due to
# the way encoded strings handle continuation lines, we need to
# be prepared to encode any whitespace if the next line turns
# out to start with an encoded word.
lines.append(newline + tstr)

whitespace_accumulator = []
for char in lines[-1]:
if char not in WSP:
break
whitespace_accumulator.append(char)
leading_whitespace = ''.join(whitespace_accumulator)
if not all(char in WSP for char in lines[-1]):
last_word_is_ew = False
Comment thread
robsdedude marked this conversation as resolved.
Outdated
last_ew = None
continue
if not hasattr(part, 'encode'):
Expand Down Expand Up @@ -2994,10 +3008,12 @@ def _refold_parse_tree(parse_tree, *, policy):
else:
# We can't fold it onto the next line either...
lines[-1] += tstr
if any(char not in WSP for char in tstr):
last_word_is_ew = False
Comment thread
robsdedude marked this conversation as resolved.
Outdated

return policy.linesep.join(lines) + policy.linesep

def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace):
def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, last_word_is_ew):
"""Fold string to_encode into lines as encoded word, combining if allowed.
Return the new value for last_ew, or None if ew_combine_allowed is False.

Expand All @@ -3012,14 +3028,22 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
to_encode = str(
get_unstructured(lines[-1][last_ew:] + to_encode))
lines[-1] = lines[-1][:last_ew]
elif to_encode[0] in WSP:
elif to_encode[0] in WSP and not last_word_is_ew:
# We're joining this to non-encoded text, so don't encode
# the leading blank.
leading_wsp = to_encode[0]
to_encode = to_encode[1:]
if (len(lines[-1]) == maxlen):
lines.append(_steal_trailing_WSP_if_exists(lines))
lines[-1] += leading_wsp
elif last_word_is_ew:
# If we are following up an encoded word with another encoded word,
# any white space between the two will be ignored when decoded.
# Therefore, we encode all to-be-displayed whitespace in the second
# encoded word.
leading_whitespace = _steal_all_trailing_WSP_if_exists(lines)
to_encode = leading_whitespace + to_encode
Comment thread
robsdedude marked this conversation as resolved.
lines[-1] = ' '

trailing_wsp = ''
if to_encode[-1] in WSP:
Expand All @@ -3040,20 +3064,11 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,

while to_encode:
remaining_space = maxlen - len(lines[-1])
text_space = remaining_space - chrome_len - len(leading_whitespace)
text_space = remaining_space - chrome_len
if text_space <= 0:
lines.append(' ')
continue
Comment thread
robsdedude marked this conversation as resolved.

# If we are at the start of a continuation line, prepend whitespace
# (we only want to do this when the line starts with an encoded word
# but if we're folding in this helper function, then we know that we
# are going to be writing out an encoded word.)
if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace:
encoded_word = _ew.encode(leading_whitespace, charset=encode_as)
lines[-1] += encoded_word
leading_whitespace = ''

to_encode_word = to_encode[:text_space]
encoded_word = _ew.encode(to_encode_word, charset=encode_as)
excess = len(encoded_word) - remaining_space
Expand All @@ -3065,7 +3080,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
excess = len(encoded_word) - remaining_space
lines[-1] += encoded_word
to_encode = to_encode[len(to_encode_word):]
leading_whitespace = ''

if to_encode:
lines.append(' ')
Expand Down
18 changes: 18 additions & 0 deletions Lib/test/test_email/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,24 @@ def test_defaults_handle_spaces_at_start_of_continuation_line(self):
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

# gh-144156
# https://github.com/python/cpython/issues/144156
Comment thread
robsdedude marked this conversation as resolved.
Outdated
def test_defaults_handle_spaces_at_start_of_continuation_line_2(self):
source = ("Re: [SOS-1495488] Commande et livraison - Demande de retour - "
"bibijolie - 251210-AABBCC - Abo actualités digitales 20 semaines "
"d’abonnement à 24 heures, Bilan, Tribune de Genève et tous les titres Tamedia")
expected = (b"Subject: "
b"Re: [SOS-1495488] Commande et livraison - Demande de retour -\n"
b" bibijolie - 251210-AABBCC - Abo =?utf-8?q?actualit=C3=A9s?= digitales 20\n"
b" semaines =?utf-8?q?d=E2=80=99abonnement_=C3=A0?= 24 heures, Bilan, Tribune de\n"
b" =?utf-8?q?Gen=C3=A8ve?= et tous les titres Tamedia\n\n")
msg = EmailMessage()
msg['Subject'] = source
s = io.BytesIO()
g = BytesGenerator(s)
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

Comment thread
robsdedude marked this conversation as resolved.
def test_cte_type_7bit_handles_unknown_8bit(self):
source = ("Subject: Maintenant je vous présente mon "
"collègue\n\n").encode('utf-8')
Expand Down
Loading