Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3df688e

Browse files
ezequielp-activestateicanhasmath
authored andcommitted
Make parseaddr and getaddresses return Unicode when given Unicode input
1 parent 6f55a24 commit 3df688e

3 files changed

Lines changed: 80 additions & 32 deletions

File tree

Lib/email/test/test_email.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2322,16 +2322,19 @@ def test_parseaddr_multiple_domains(self):
23222322

23232323
def test_parseaddr_unicode(self):
23242324
"""Test parseaddr with unicode strings"""
2325-
23262325
test_cases = [
2327-
2328-
u'Test User <[email protected]>',
2329-
u'"Test User" <[email protected]>',
2326+
2327+
(u'Test User <[email protected]>', (u'Test User', u'[email protected]')),
2328+
(u'"Test User" <[email protected]>', (u'Test User', u'[email protected]')),
23302329
]
23312330

2332-
for addr in test_cases:
2331+
for addr, expected in test_cases:
23332332
result = Utils.parseaddr(addr, strict=True)
2334-
self.assertNotEqual(result, ('', ''))
2333+
self.assertEqual(result, expected)
2334+
if result[0]:
2335+
self.assertIsInstance(result[0], unicode)
2336+
if result[1]:
2337+
self.assertIsInstance(result[1], unicode)
23352338

23362339
result_non_strict = Utils.parseaddr(addr, strict=False)
23372340
self.assertEqual(result, result_non_strict)
@@ -2449,18 +2452,22 @@ def test_getaddresses_embedded_comment(self):
24492452
eq(addrs[0][1], '[email protected]')
24502453

24512454
def test_getaddresses_unicode(self):
2452-
"""Test getaddresses with unicode strings in Python 2"""
2453-
2455+
"""Test getaddresses with unicode strings"""
24542456
test_cases = [
2455-
2456-
([u'Test User <[email protected]>'], [('Test User', '[email protected]')]),
2457-
([u'"Test User" <[email protected]>'], [('Test User', '[email protected]')]),
2458-
2457+
2458+
([u'Test User <[email protected]>'], [(u'Test User', u'[email protected]')]),
2459+
([u'"Test User" <[email protected]>'], [(u'Test User', u'[email protected]')]),
2460+
24592461
]
24602462

24612463
for addrs, expected in test_cases:
24622464
result = Utils.getaddresses(addrs)
24632465
self.assertEqual(result, expected)
2466+
for realname, email in result:
2467+
if realname:
2468+
self.assertIsInstance(realname, unicode)
2469+
if email:
2470+
self.assertIsInstance(email, unicode)
24642471

24652472
def test_make_msgid_collisions(self):
24662473
# Test make_msgid uniqueness, even with multiple threads

Lib/email/test/test_email_renamed.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2201,16 +2201,19 @@ def test_parseaddr_empty(self):
22012201

22022202
def test_parseaddr_unicode(self):
22032203
"""Test parseaddr with unicode strings"""
2204-
22052204
test_cases = [
2206-
2207-
u'Test User <[email protected]>',
2208-
u'"Test User" <[email protected]>',
2205+
2206+
(u'Test User <[email protected]>', (u'Test User', u'[email protected]')),
2207+
(u'"Test User" <[email protected]>', (u'Test User', u'[email protected]')),
22092208
]
22102209

2211-
for addr in test_cases:
2210+
for addr, expected in test_cases:
22122211
result = utils.parseaddr(addr, strict=True)
2213-
self.assertNotEqual(result, ('', ''))
2212+
self.assertEqual(result, expected)
2213+
if result[0]:
2214+
self.assertIsInstance(result[0], unicode)
2215+
if result[1]:
2216+
self.assertIsInstance(result[1], unicode)
22142217

22152218
result_non_strict = utils.parseaddr(addr, strict=False)
22162219
self.assertEqual(result, result_non_strict)
@@ -2310,18 +2313,22 @@ def test_getaddresses_embedded_comment(self):
23102313
eq(addrs[0][1], '[email protected]')
23112314

23122315
def test_getaddresses_unicode(self):
2313-
"""Test getaddresses with unicode strings in Python 2"""
2314-
2316+
"""Test getaddresses with unicode strings"""
23152317
test_cases = [
2316-
2317-
([u'Test User <[email protected]>'], [('Test User', '[email protected]')]),
2318-
([u'"Test User" <[email protected]>'], [('Test User', '[email protected]')]),
2319-
2318+
2319+
([u'Test User <[email protected]>'], [(u'Test User', u'[email protected]')]),
2320+
([u'"Test User" <[email protected]>'], [(u'Test User', u'[email protected]')]),
2321+
23202322
]
23212323

23222324
for addrs, expected in test_cases:
23232325
result = utils.getaddresses(addrs)
23242326
self.assertEqual(result, expected)
2327+
for realname, email in result:
2328+
if realname:
2329+
self.assertIsInstance(realname, unicode)
2330+
if email:
2331+
self.assertIsInstance(email, unicode)
23252332

23262333
def test__quote_unquote(self):
23272334
eq = self.assertEqual

Lib/email/utils.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,16 +162,19 @@ def getaddresses(fieldvalues, strict=True):
162162
a = _AddressList(all)
163163
return a.addresslist
164164

165+
unicode_flags = []
165166
converted_values = []
166167
for v in fieldvalues:
167-
if isinstance(v, unicode):
168+
is_unicode = isinstance(v, unicode)
169+
unicode_flags.append(is_unicode)
170+
171+
if is_unicode:
168172
v = v.encode('utf-8')
169173
elif not isinstance(v, str):
170174
v = str(v)
171175
converted_values.append(v)
172176

173-
fieldvalues = converted_values
174-
fieldvalues = _pre_parse_validation(fieldvalues)
177+
fieldvalues = _pre_parse_validation(converted_values)
175178
addr = COMMASPACE.join(fieldvalues)
176179
a = _AddressList(addr)
177180
result = _post_parse_validation(a.addresslist)
@@ -188,7 +191,29 @@ def getaddresses(fieldvalues, strict=True):
188191
if len(result) != n:
189192
return [('', '')]
190193

191-
return result
194+
final_result = []
195+
result_idx = 0
196+
197+
for i, was_unicode in enumerate(unicode_flags):
198+
if result_idx >= len(result):
199+
break
200+
201+
realname, email = result[result_idx]
202+
203+
if was_unicode:
204+
if realname:
205+
realname = realname.decode('utf-8')
206+
if email:
207+
email = email.decode('utf-8')
208+
209+
final_result.append((realname, email))
210+
result_idx += 1
211+
212+
while result_idx < len(result):
213+
final_result.append(result[result_idx])
214+
result_idx += 1
215+
216+
return final_result
192217

193218

194219
def _check_parenthesis(addr):
@@ -347,12 +372,12 @@ def parseaddr(addr, strict=True):
347372
if isinstance(addr, list):
348373
addr = addr[0]
349374

350-
# FIX: Support both str and unicode in Python 2
375+
is_unicode = isinstance(addr, unicode)
376+
351377
if not isinstance(addr, (str, unicode)):
352378
return ('', '')
353379

354-
# Convert unicode to str for consistent processing
355-
if isinstance(addr, unicode):
380+
if is_unicode:
356381
addr = addr.encode('utf-8')
357382

358383
addr = _pre_parse_validation([addr])[0]
@@ -361,8 +386,17 @@ def parseaddr(addr, strict=True):
361386
if not addrs or len(addrs) > 1:
362387
return ('', '')
363388

364-
return addrs[0]
389+
result = addrs[0]
365390

391+
if is_unicode:
392+
realname, email = result
393+
if realname:
394+
realname = realname.decode('utf-8')
395+
if email:
396+
email = email.decode('utf-8')
397+
return (realname, email)
398+
399+
return result
366400

367401

368402
# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.

0 commit comments

Comments
 (0)