Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c4bb599

Browse files
committed
Merge: #23745: handle duplicate MIME parameter names in new parser.
2 parents 520bddf + 7d0325d commit c4bb599

3 files changed

Lines changed: 139 additions & 7 deletions

File tree

Lib/email/_header_value_parser.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
import urllib # For urllib.parse.unquote
7272
from string import hexdigits
7373
from collections import OrderedDict
74+
from operator import itemgetter
7475
from email import _encoded_words as _ew
7576
from email import errors
7677
from email import utils
@@ -1098,15 +1099,34 @@ def params(self):
10981099
params[name] = []
10991100
params[name].append((token.section_number, token))
11001101
for name, parts in params.items():
1101-
parts = sorted(parts)
1102-
# XXX: there might be more recovery we could do here if, for
1103-
# example, this is really a case of a duplicate attribute name.
1102+
parts = sorted(parts, key=itemgetter(0))
1103+
first_param = parts[0][1]
1104+
charset = first_param.charset
1105+
# Our arbitrary error recovery is to ignore duplicate parameters,
1106+
# to use appearance order if there are duplicate rfc 2231 parts,
1107+
# and to ignore gaps. This mimics the error recovery of get_param.
1108+
if not first_param.extended and len(parts) > 1:
1109+
if parts[1][0] == 0:
1110+
parts[1][1].defects.append(errors.InvalidHeaderDefect(
1111+
'duplicate parameter name; duplicate(s) ignored'))
1112+
parts = parts[:1]
1113+
# Else assume the *0* was missing...note that this is different
1114+
# from get_param, but we registered a defect for this earlier.
11041115
value_parts = []
1105-
charset = parts[0][1].charset
1106-
for i, (section_number, param) in enumerate(parts):
1116+
i = 0
1117+
for section_number, param in parts:
11071118
if section_number != i:
1108-
param.defects.append(errors.InvalidHeaderDefect(
1109-
"inconsistent multipart parameter numbering"))
1119+
# We could get fancier here and look for a complete
1120+
# duplicate extended parameter and ignore the second one
1121+
# seen. But we're not doing that. The old code didn't.
1122+
if not param.extended:
1123+
param.defects.append(errors.InvalidHeaderDefect(
1124+
'duplicate parameter name; duplicate ignored'))
1125+
continue
1126+
else:
1127+
param.defects.append(errors.InvalidHeaderDefect(
1128+
"inconsistent RFC2231 parameter numbering"))
1129+
i += 1
11101130
value = param.param_value
11111131
if param.extended:
11121132
try:

Lib/test/test_email/test__header_value_parser.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2456,6 +2456,115 @@ def test_invalid_content_transfer_encoding(self):
24562456
";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3
24572457
)
24582458

2459+
2460+
@parameterize
2461+
class Test_parse_mime_parameters(TestParserMixin, TestEmailBase):
2462+
2463+
def mime_parameters_as_value(self,
2464+
value,
2465+
tl_str,
2466+
tl_value,
2467+
params,
2468+
defects):
2469+
mime_parameters = self._test_parse_x(parser.parse_mime_parameters,
2470+
value, tl_str, tl_value, defects)
2471+
self.assertEqual(mime_parameters.token_type, 'mime-parameters')
2472+
self.assertEqual(list(mime_parameters.params), params)
2473+
2474+
2475+
mime_parameters_params = {
2476+
2477+
'simple': (
2478+
'filename="abc.py"',
2479+
' filename="abc.py"',
2480+
'filename=abc.py',
2481+
[('filename', 'abc.py')],
2482+
[]),
2483+
2484+
'multiple_keys': (
2485+
'filename="abc.py"; xyz=abc',
2486+
' filename="abc.py"; xyz="abc"',
2487+
'filename=abc.py; xyz=abc',
2488+
[('filename', 'abc.py'), ('xyz', 'abc')],
2489+
[]),
2490+
2491+
'split_value': (
2492+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
2493+
' filename="201.tif"',
2494+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
2495+
[('filename', '201.tif')],
2496+
[]),
2497+
2498+
# Note that it is undefined what we should do for error recovery when
2499+
# there are duplicate parameter names or duplicate parts in a split
2500+
# part. We choose to ignore all duplicate parameters after the first
2501+
# and to take duplicate or missing rfc 2231 parts in apperance order.
2502+
# This is backward compatible with get_param's behavior, but the
2503+
# decisions are arbitrary.
2504+
2505+
'duplicate_key': (
2506+
'filename=abc.gif; filename=def.tiff',
2507+
' filename="abc.gif"',
2508+
"filename=abc.gif; filename=def.tiff",
2509+
[('filename', 'abc.gif')],
2510+
[errors.InvalidHeaderDefect]),
2511+
2512+
'duplicate_key_with_split_value': (
2513+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
2514+
" filename=abc.gif",
2515+
' filename="201.tif"',
2516+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
2517+
" filename=abc.gif",
2518+
[('filename', '201.tif')],
2519+
[errors.InvalidHeaderDefect]),
2520+
2521+
'duplicate_key_with_split_value_other_order': (
2522+
"filename=abc.gif; "
2523+
" filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
2524+
' filename="abc.gif"',
2525+
"filename=abc.gif;"
2526+
" filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
2527+
[('filename', 'abc.gif')],
2528+
[errors.InvalidHeaderDefect]),
2529+
2530+
'duplicate_in_split_value': (
2531+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
2532+
" filename*1*=abc.gif",
2533+
' filename="201.tifabc.gif"',
2534+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
2535+
" filename*1*=abc.gif",
2536+
[('filename', '201.tifabc.gif')],
2537+
[errors.InvalidHeaderDefect]),
2538+
2539+
'missing_split_value': (
2540+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
2541+
' filename="201.tif"',
2542+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
2543+
[('filename', '201.tif')],
2544+
[errors.InvalidHeaderDefect]),
2545+
2546+
'duplicate_and_missing_split_value': (
2547+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
2548+
" filename*3*=abc.gif",
2549+
' filename="201.tifabc.gif"',
2550+
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
2551+
" filename*3*=abc.gif",
2552+
[('filename', '201.tifabc.gif')],
2553+
[errors.InvalidHeaderDefect]*2),
2554+
2555+
# Here we depart from get_param and assume the *0* was missing.
2556+
'duplicate_with_broken_split_value': (
2557+
"filename=abc.gif; "
2558+
" filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
2559+
' filename="abc.gif201.tif"',
2560+
"filename=abc.gif;"
2561+
" filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
2562+
[('filename', 'abc.gif201.tif')],
2563+
# Defects are apparent missing *0*, and two 'out of sequence'.
2564+
[errors.InvalidHeaderDefect]*3),
2565+
2566+
}
2567+
24592568
@parameterize
24602569
class Test_parse_mime_version(TestParserMixin, TestEmailBase):
24612570

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ Core and Builtins
5656
Library
5757
-------
5858

59+
- Issue #23745: The new email header parser now handles duplicate MIME
60+
parameter names without error, similar to how get_param behaves.
61+
5962
- Issue #22117: Fix os.utime(), it now rounds the timestamp towards minus
6063
infinity (-inf) instead of rounding towards zero.
6164

0 commit comments

Comments
 (0)