Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 80e0aee

Browse files
committed
#1672568: email now registers defects for base64 payload format errors.
Which also means that it is now producing *something* for any base64 payload, which is what leads to the couple of older test changes in test_email. This is a slightly backward incompatible behavior change, but the new behavior is so much more useful than the old (you can now *reliably* detect errors, and any program that was detecting errors by sniffing for a base64 return from get_payload(decode=True) and then doing its own error-recovery decode will just get the error-recovery decode right away). So this seems to me to be worth the small risk inherent in this behavior change. This patch also refactors the defect tests into a separate test file, since they are no longer just parser tests.
1 parent adbdcdb commit 80e0aee

6 files changed

Lines changed: 344 additions & 275 deletions

File tree

Doc/library/email.errors.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,10 @@ this class is *not* an exception!
9696
this defect, its :meth:`is_multipart` method may return false even though its
9797
content type claims to be :mimetype:`multipart`.
9898

99+
* :class:`InvalidBase64PaddingDefect` -- When decoding a block of base64
100+
enocded bytes, the padding was not correct. Enough padding is added to
101+
perform the decode, but the resulting decoded bytes may be invalid.
102+
103+
* :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64
104+
enocded bytes, characters outside the base64 alphebet were encountered.
105+
The characters are ignored, but the resulting decoded bytes may be invalid.

Doc/library/email.message.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,14 @@ Here are the methods of the :class:`Message` class:
111111
header. When ``True`` and the message is not a multipart, the payload will
112112
be decoded if this header's value is ``quoted-printable`` or ``base64``.
113113
If some other encoding is used, or :mailheader:`Content-Transfer-Encoding`
114-
header is missing, or if the payload has bogus base64 data, the payload is
114+
header is missing, the payload is
115115
returned as-is (undecoded). In all cases the returned value is binary
116116
data. If the message is a multipart and the *decode* flag is ``True``,
117-
then ``None`` is returned.
117+
then ``None`` is returned. If the payload is base64 and it was not
118+
perfectly formed (missing padding, characters outside the base64
119+
alphabet), then an appropriate defect will be added to the message's
120+
defect property (:class:`~email.errors.InvalidBase64PaddingDefect` or
121+
:class:`~email.errors.InvalidBase64CharactersDefect`, respectively).
118122

119123
When *decode* is ``False`` (the default) the body is returned as a string
120124
without decoding the :mailheader:`Content-Transfer-Encoding`. However,

Lib/email/message.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from email import errors
1818
from email._policybase import compat32
1919
from email import charset as _charset
20+
from email._encoded_words import decode_b
2021
Charset = _charset.Charset
2122

2223
SEMISPACE = '; '
@@ -249,11 +250,12 @@ def get_payload(self, i=None, decode=False):
249250
if cte == 'quoted-printable':
250251
return utils._qdecode(bpayload)
251252
elif cte == 'base64':
252-
try:
253-
return base64.b64decode(bpayload)
254-
except binascii.Error:
255-
# Incorrect padding
256-
return bpayload
253+
# XXX: this is a bit of a hack; decode_b should probably be factored
254+
# out somewhere, but I haven't figured out where yet.
255+
value, defects = decode_b(b''.join(bpayload.splitlines()))
256+
for defect in defects:
257+
self.policy.handle_defect(self, defect)
258+
return value
257259
elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
258260
in_file = BytesIO(bpayload)
259261
out_file = BytesIO()
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
import textwrap
2+
import unittest
3+
from email._policybase import Compat32
4+
from email import errors
5+
from test.test_email import TestEmailBase
6+
7+
8+
class TestMessageDefectDetectionBase:
9+
10+
dup_boundary_msg = textwrap.dedent("""\
11+
Subject: XX
12+
13+
To: XX
14+
Mime-version: 1.0
15+
Content-type: multipart/mixed;
16+
boundary="MS_Mac_OE_3071477847_720252_MIME_Part"
17+
18+
--MS_Mac_OE_3071477847_720252_MIME_Part
19+
Content-type: multipart/alternative;
20+
boundary="MS_Mac_OE_3071477847_720252_MIME_Part"
21+
22+
--MS_Mac_OE_3071477847_720252_MIME_Part
23+
Content-type: text/plain; charset="ISO-8859-1"
24+
Content-transfer-encoding: quoted-printable
25+
26+
text
27+
28+
--MS_Mac_OE_3071477847_720252_MIME_Part
29+
Content-type: text/html; charset="ISO-8859-1"
30+
Content-transfer-encoding: quoted-printable
31+
32+
<HTML></HTML>
33+
34+
--MS_Mac_OE_3071477847_720252_MIME_Part--
35+
36+
--MS_Mac_OE_3071477847_720252_MIME_Part
37+
Content-type: image/gif; name="xx.gif";
38+
Content-disposition: attachment
39+
Content-transfer-encoding: base64
40+
41+
Some removed base64 encoded chars.
42+
43+
--MS_Mac_OE_3071477847_720252_MIME_Part--
44+
45+
""")
46+
47+
def test_same_boundary_inner_outer(self):
48+
# XXX better would be to actually detect the duplicate.
49+
msg = self._str_msg(self.dup_boundary_msg)
50+
inner = msg.get_payload(0)
51+
self.assertTrue(hasattr(inner, 'defects'))
52+
self.assertEqual(len(self.get_defects(inner)), 1)
53+
self.assertTrue(isinstance(self.get_defects(inner)[0],
54+
errors.StartBoundaryNotFoundDefect))
55+
56+
def test_same_boundary_inner_outer_raises_on_defect(self):
57+
with self.assertRaises(errors.StartBoundaryNotFoundDefect):
58+
self._str_msg(self.dup_boundary_msg,
59+
policy=self.policy.clone(raise_on_defect=True))
60+
61+
no_boundary_msg = textwrap.dedent("""\
62+
Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
63+
From: foobar
64+
Subject: broken mail
65+
MIME-Version: 1.0
66+
Content-Type: multipart/report; report-type=delivery-status;
67+
68+
--JAB03225.986577786/zinfandel.lacita.com
69+
70+
One part
71+
72+
--JAB03225.986577786/zinfandel.lacita.com
73+
Content-Type: message/delivery-status
74+
75+
Header: Another part
76+
77+
--JAB03225.986577786/zinfandel.lacita.com--
78+
""")
79+
80+
def test_multipart_no_boundary(self):
81+
msg = self._str_msg(self.no_boundary_msg)
82+
self.assertTrue(isinstance(msg.get_payload(), str))
83+
self.assertEqual(len(self.get_defects(msg)), 2)
84+
self.assertTrue(isinstance(self.get_defects(msg)[0],
85+
errors.NoBoundaryInMultipartDefect))
86+
self.assertTrue(isinstance(self.get_defects(msg)[1],
87+
errors.MultipartInvariantViolationDefect))
88+
89+
def test_multipart_no_boundary_raise_on_defect(self):
90+
with self.assertRaises(errors.NoBoundaryInMultipartDefect):
91+
self._str_msg(self.no_boundary_msg,
92+
policy=self.policy.clone(raise_on_defect=True))
93+
94+
multipart_msg = textwrap.dedent("""\
95+
Date: Wed, 14 Nov 2007 12:56:23 GMT
96+
97+
98+
Subject: Content-Transfer-Encoding: base64 and multipart
99+
MIME-Version: 1.0
100+
Content-Type: multipart/mixed;
101+
boundary="===============3344438784458119861=="{}
102+
103+
--===============3344438784458119861==
104+
Content-Type: text/plain
105+
106+
Test message
107+
108+
--===============3344438784458119861==
109+
Content-Type: application/octet-stream
110+
Content-Transfer-Encoding: base64
111+
112+
YWJj
113+
114+
--===============3344438784458119861==--
115+
""")
116+
117+
def test_multipart_invalid_cte(self):
118+
msg = self._str_msg(
119+
self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
120+
self.assertEqual(len(self.get_defects(msg)), 1)
121+
self.assertIsInstance(self.get_defects(msg)[0],
122+
errors.InvalidMultipartContentTransferEncodingDefect)
123+
124+
def test_multipart_invalid_cte_raise_on_defect(self):
125+
with self.assertRaises(
126+
errors.InvalidMultipartContentTransferEncodingDefect):
127+
self._str_msg(
128+
self.multipart_msg.format(
129+
"\nContent-Transfer-Encoding: base64"),
130+
policy=self.policy.clone(raise_on_defect=True))
131+
132+
def test_multipart_no_cte_no_defect(self):
133+
msg = self._str_msg(self.multipart_msg.format(''))
134+
self.assertEqual(len(self.get_defects(msg)), 0)
135+
136+
def test_multipart_valid_cte_no_defect(self):
137+
for cte in ('7bit', '8bit', 'BINary'):
138+
msg = self._str_msg(
139+
self.multipart_msg.format("\nContent-Transfer-Encoding: "+cte))
140+
self.assertEqual(len(self.get_defects(msg)), 0, "cte="+cte)
141+
142+
lying_multipart_msg = textwrap.dedent("""\
143+
From: "Allison Dunlap" <[email protected]>
144+
145+
Subject: 64423
146+
Date: Sun, 11 Jul 2004 16:09:27 -0300
147+
MIME-Version: 1.0
148+
Content-Type: multipart/alternative;
149+
150+
Blah blah blah
151+
""")
152+
153+
def test_lying_multipart(self):
154+
msg = self._str_msg(self.lying_multipart_msg)
155+
self.assertTrue(hasattr(msg, 'defects'))
156+
self.assertEqual(len(self.get_defects(msg)), 2)
157+
self.assertTrue(isinstance(self.get_defects(msg)[0],
158+
errors.NoBoundaryInMultipartDefect))
159+
self.assertTrue(isinstance(self.get_defects(msg)[1],
160+
errors.MultipartInvariantViolationDefect))
161+
162+
def test_lying_multipart_raise_on_defect(self):
163+
with self.assertRaises(errors.NoBoundaryInMultipartDefect):
164+
self._str_msg(self.lying_multipart_msg,
165+
policy=self.policy.clone(raise_on_defect=True))
166+
167+
missing_start_boundary_msg = textwrap.dedent("""\
168+
Content-Type: multipart/mixed; boundary="AAA"
169+
From: Mail Delivery Subsystem <[email protected]>
170+
171+
172+
--AAA
173+
174+
Stuff
175+
176+
--AAA
177+
Content-Type: message/rfc822
178+
179+
180+
181+
Content-Type: multipart/mixed; boundary="BBB"
182+
183+
--BBB--
184+
185+
--AAA--
186+
187+
""")
188+
189+
def test_missing_start_boundary(self):
190+
# The message structure is:
191+
#
192+
# multipart/mixed
193+
# text/plain
194+
# message/rfc822
195+
# multipart/mixed [*]
196+
#
197+
# [*] This message is missing its start boundary
198+
outer = self._str_msg(self.missing_start_boundary_msg)
199+
bad = outer.get_payload(1).get_payload(0)
200+
self.assertEqual(len(self.get_defects(bad)), 1)
201+
self.assertTrue(isinstance(self.get_defects(bad)[0],
202+
errors.StartBoundaryNotFoundDefect))
203+
204+
def test_missing_start_boundary_raise_on_defect(self):
205+
with self.assertRaises(errors.StartBoundaryNotFoundDefect):
206+
self._str_msg(self.missing_start_boundary_msg,
207+
policy=self.policy.clone(raise_on_defect=True))
208+
209+
def test_first_line_is_continuation_header(self):
210+
msg = self._str_msg(' Line 1\nSubject: test\n\nbody')
211+
self.assertEqual(msg.keys(), ['Subject'])
212+
self.assertEqual(msg.get_payload(), 'body')
213+
self.assertEqual(len(self.get_defects(msg)), 1)
214+
self.assertDefectsEqual(self.get_defects(msg),
215+
[errors.FirstHeaderLineIsContinuationDefect])
216+
self.assertEqual(self.get_defects(msg)[0].line, ' Line 1\n')
217+
218+
def test_first_line_is_continuation_header_raise_on_defect(self):
219+
with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect):
220+
self._str_msg(' Line 1\nSubject: test\n\nbody\n',
221+
policy=self.policy.clone(raise_on_defect=True))
222+
223+
def test_missing_header_body_separator(self):
224+
# Our heuristic if we see a line that doesn't look like a header (no
225+
# leading whitespace but no ':') is to assume that the blank line that
226+
# separates the header from the body is missing, and to stop parsing
227+
# headers and start parsing the body.
228+
msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
229+
self.assertEqual(msg.keys(), ['Subject'])
230+
self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
231+
self.assertDefectsEqual(self.get_defects(msg),
232+
[errors.MissingHeaderBodySeparatorDefect])
233+
234+
def test_missing_header_body_separator_raise_on_defect(self):
235+
with self.assertRaises(errors.MissingHeaderBodySeparatorDefect):
236+
self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n',
237+
policy=self.policy.clone(raise_on_defect=True))
238+
239+
badly_padded_base64_payload = textwrap.dedent("""\
240+
Subject: test
241+
MIME-Version: 1.0
242+
Content-Type: text/plain; charset="utf-8"
243+
Content-Transfer-Encoding: base64
244+
245+
dmk
246+
""")
247+
248+
def test_bad_padding_in_base64_payload(self):
249+
msg = self._str_msg(self.badly_padded_base64_payload)
250+
self.assertEqual(msg.get_payload(decode=True), b'vi')
251+
self.assertDefectsEqual(self.get_defects(msg),
252+
[errors.InvalidBase64PaddingDefect])
253+
254+
def test_bad_padding_in_base64_payload_raise_on_defect(self):
255+
msg = self._str_msg(self.badly_padded_base64_payload,
256+
policy=self.policy.clone(raise_on_defect=True))
257+
with self.assertRaises(errors.InvalidBase64PaddingDefect):
258+
msg.get_payload(decode=True)
259+
260+
invalid_chars_in_base64_payload = textwrap.dedent("""\
261+
Subject: test
262+
MIME-Version: 1.0
263+
Content-Type: text/plain; charset="utf-8"
264+
Content-Transfer-Encoding: base64
265+
266+
dm\x01k===
267+
""")
268+
269+
def test_invalid_chars_in_base64_payload(self):
270+
msg = self._str_msg(self.invalid_chars_in_base64_payload)
271+
self.assertEqual(msg.get_payload(decode=True), b'vi')
272+
self.assertDefectsEqual(self.get_defects(msg),
273+
[errors.InvalidBase64CharactersDefect])
274+
275+
def test_invalid_chars_in_base64_payload_raise_on_defect(self):
276+
msg = self._str_msg(self.invalid_chars_in_base64_payload,
277+
policy=self.policy.clone(raise_on_defect=True))
278+
with self.assertRaises(errors.InvalidBase64CharactersDefect):
279+
msg.get_payload(decode=True)
280+
281+
282+
class TestMessageDefectDetection(TestMessageDefectDetectionBase, TestEmailBase):
283+
284+
def get_defects(self, obj):
285+
return obj.defects
286+
287+
288+
class TestMessageDefectDetectionCapture(TestMessageDefectDetectionBase,
289+
TestEmailBase):
290+
291+
class CapturePolicy(Compat32):
292+
captured = None
293+
def register_defect(self, obj, defect):
294+
self.captured.append(defect)
295+
296+
def setUp(self):
297+
self.policy = self.CapturePolicy(captured=list())
298+
299+
def get_defects(self, obj):
300+
return self.policy.captured
301+
302+
303+
if __name__ == '__main__':
304+
unittest.main()

0 commit comments

Comments
 (0)