Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 63563cd

Browse files
committed
#9286: Fix the rfc822 parser to preserve whitespace in address local part.
Such addresses are not RFC compliant except under the 'obsolete syntax' rules, but before this fix the whitespace was dropped from the input, concatenating the pieces. That breaks one of the principles of the email package, that of preserving the input as much as possible. It also denies the application program the opportunity to apply its own heuristics to interpretation of such non-compliant addresses. It is possible users of the email package were depending on the local part always being a single token, so this fix will not be backported.
1 parent 2b37ce7 commit 63563cd

3 files changed

Lines changed: 35 additions & 2 deletions

File tree

Lib/email/_parseaddr.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,18 @@ def __init__(self, field):
199199
self.commentlist = []
200200

201201
def gotonext(self):
202-
"""Parse up to the start of the next address."""
202+
"""Skip white space and extract comments."""
203+
wslist = []
203204
while self.pos < len(self.field):
204205
if self.field[self.pos] in self.LWS + '\n\r':
206+
if self.field[self.pos] not in '\n\r':
207+
wslist.append(self.field[self.pos])
205208
self.pos += 1
206209
elif self.field[self.pos] == '(':
207210
self.commentlist.append(self.getcomment())
208211
else:
209212
break
213+
return EMPTYSTRING.join(wslist)
210214

211215
def getaddrlist(self):
212216
"""Parse all addresses.
@@ -319,16 +323,24 @@ def getaddrspec(self):
319323

320324
self.gotonext()
321325
while self.pos < len(self.field):
326+
preserve_ws = True
322327
if self.field[self.pos] == '.':
328+
if aslist and not aslist[-1].strip():
329+
aslist.pop()
323330
aslist.append('.')
324331
self.pos += 1
332+
preserve_ws = False
325333
elif self.field[self.pos] == '"':
326334
aslist.append('"%s"' % quote(self.getquote()))
327335
elif self.field[self.pos] in self.atomends:
336+
if aslist and not aslist[-1].strip():
337+
aslist.pop()
328338
break
329339
else:
330340
aslist.append(self.getatom())
331-
self.gotonext()
341+
ws = self.gotonext()
342+
if preserve_ws and ws:
343+
aslist.append(ws)
332344

333345
if self.pos >= len(self.field) or self.field[self.pos] != '@':
334346
return EMPTYSTRING.join(aslist)

Lib/email/test/test_email.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2342,6 +2342,24 @@ def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
23422342
eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
23432343
('', '"\\\\"example\\\\" example"@example.com'))
23442344

2345+
def test_parseaddr_preserves_spaces_in_local_part(self):
2346+
# issue 9286. A normal RFC5322 local part should not contain any
2347+
# folding white space, but legacy local parts can (they are a sequence
2348+
# of atoms, not dotatoms). On the other hand we strip whitespace from
2349+
# before the @ and around dots, on the assumption that the whitespace
2350+
# around the punctuation is a mistake in what would otherwise be
2351+
# an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2352+
self.assertEqual(('', "merwok [email protected]"),
2353+
utils.parseaddr("merwok [email protected]"))
2354+
self.assertEqual(('', "merwok [email protected]"),
2355+
utils.parseaddr("merwok [email protected]"))
2356+
self.assertEqual(('', "merwok [email protected]"),
2357+
utils.parseaddr(" merwok wok @xample.com"))
2358+
self.assertEqual(('', 'merwok"wok" [email protected]'),
2359+
utils.parseaddr('merwok"wok" [email protected]'))
2360+
self.assertEqual(('', '[email protected]'),
2361+
utils.parseaddr('merwok. wok . [email protected]'))
2362+
23452363
def test_multiline_from_comment(self):
23462364
x = """\
23472365
Foo

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ Core and Builtins
2323
Library
2424
-------
2525

26+
- Issue #9286: email.utils.parseaddr no longer concatenates blank-separated
27+
words in the local part of email addresses, thereby preserving the input.
28+
2629
- Issue #6791: Limit header line length (to 65535 bytes) in http.client
2730
and http.server, to avoid denial of services from the other party.
2831

0 commit comments

Comments
 (0)