Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 554bcbf

Browse files
committed
#19662: add decode_data to smtpd so you can get at DATA in bytes form.
Otherwise smtpd is restricted to 7bit clean data, since even if the incoming data is actually utf-8, it will often break things to decode it before parsing the message. Patch by Maciej Szulik, with some adjustments (mostly the warning support).
1 parent 38ee9af commit 554bcbf

4 files changed

Lines changed: 185 additions & 18 deletions

File tree

Doc/library/smtpd.rst

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ SMTPServer Objects
2828

2929

3030
.. class:: SMTPServer(localaddr, remoteaddr, data_size_limit=33554432,\
31-
map=None)
31+
map=None, decode_data=True)
3232

3333
Create a new :class:`SMTPServer` object, which binds to local address
3434
*localaddr*. It will treat *remoteaddr* as an upstream SMTP relayer. It
@@ -41,6 +41,11 @@ SMTPServer Objects
4141

4242
A dictionary can be specified in *map* to avoid using a global socket map.
4343

44+
*decode_data* specifies whether the data portion of the SMTP transaction
45+
should be decoded using UTF-8. The default is ``True`` for backward
46+
compatibility reasons, but will change to ``False`` in Python 3.6. Specify
47+
the keyword value explicitly to avoid the :exc:`DeprecationWarning`.
48+
4449
.. method:: process_message(peer, mailfrom, rcpttos, data)
4550

4651
Raise :exc:`NotImplementedError` exception. Override this in subclasses to
@@ -51,6 +56,10 @@ SMTPServer Objects
5156
containing the contents of the e-mail (which should be in :rfc:`2822`
5257
format).
5358

59+
If the *decode_data* constructor keyword is set to ``True``, the *data*
60+
argument will be a unicode string. If it is set to ``False``, it
61+
will be a bytes object.
62+
5463
.. attribute:: channel_class
5564

5665
Override this in subclasses to use a custom :class:`SMTPChannel` for
@@ -59,6 +68,9 @@ SMTPServer Objects
5968
.. versionchanged:: 3.4
6069
The *map* argument was added.
6170

71+
.. versionchanged:: 3.5
72+
the *decode_data* argument was added.
73+
6274

6375
DebuggingServer Objects
6476
-----------------------
@@ -97,7 +109,7 @@ SMTPChannel Objects
97109
-------------------
98110

99111
.. class:: SMTPChannel(server, conn, addr, data_size_limit=33554432,\
100-
map=None))
112+
map=None, decode_data=True)
101113

102114
Create a new :class:`SMTPChannel` object which manages the communication
103115
between the server and a single SMTP client.
@@ -110,9 +122,17 @@ SMTPChannel Objects
110122

111123
A dictionary can be specified in *map* to avoid using a global socket map.
112124

125+
*decode_data* specifies whether the data portion of the SMTP transaction
126+
should be decoded using UTF-8. The default is ``True`` for backward
127+
compatibility reasons, but will change to ``False`` in Python 3.6. Specify
128+
the keyword value explicitly to avoid the :exc:`DeprecationWarning`.
129+
113130
To use a custom SMTPChannel implementation you need to override the
114131
:attr:`SMTPServer.channel_class` of your :class:`SMTPServer`.
115132

133+
.. versionchanged:: 3.5
134+
the *decode_data* argument was added.
135+
116136
The :class:`SMTPChannel` has the following instance variables:
117137

118138
.. attribute:: smtp_server

Doc/whatsnew/3.5.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,16 @@ signal
184184
debugging, instead of integer “magic numbers”. (contributed by Giampaolo
185185
Rodola' in :issue:`21076`)
186186

187+
smtpd
188+
-----
189+
190+
* Both :class:`~smtpd.SMTPServer` and :class:`smtpd.SMTPChannel` now accept a
191+
*decode_data* keyword to determine if the DATA portion of the SMTP
192+
transaction is decoded using the ``utf-8`` codec or is instead provided to
193+
:meth:`~smtpd.SMTPServer.process_message` as a byte string. The default
194+
is ``True`` for backward compatibility reasons, but will change to ``False``
195+
in Python 3.6. (Contributed by Maciej Szulik in :issue:`19662`.)
196+
187197
socket
188198
------
189199

@@ -245,6 +255,12 @@ Deprecated Python modules, functions and methods
245255
* The :mod:`formatter` module has now graduated to full deprecation and is still
246256
slated for removal in Python 3.6.
247257

258+
* :mod:`smtpd` has in the past always decoded the DATA portion of email
259+
messages using the ``utf-8`` codec. This can now be controlled by the new
260+
*decode_data* keyword to :class:`~smtpd.SMTPServer`. The default value is
261+
``True``, but this default is deprecated. Specify the *decode_data* keyword
262+
with an appropriate value to avoid the deprecation warning.
263+
248264

249265
Deprecated functions and types of the C API
250266
-------------------------------------------

Lib/smtpd.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ def flush(self): pass
9898

9999
DEBUGSTREAM = Devnull()
100100
NEWLINE = '\n'
101-
EMPTYSTRING = ''
102101
COMMASPACE = ', '
103102
DATA_SIZE_DEFAULT = 33554432
104103

@@ -122,12 +121,28 @@ class SMTPChannel(asynchat.async_chat):
122121
max_command_size_limit = max(command_size_limits.values())
123122

124123
def __init__(self, server, conn, addr, data_size_limit=DATA_SIZE_DEFAULT,
125-
map=None):
124+
map=None, decode_data=None):
126125
asynchat.async_chat.__init__(self, conn, map=map)
127126
self.smtp_server = server
128127
self.conn = conn
129128
self.addr = addr
130129
self.data_size_limit = data_size_limit
130+
if decode_data is None:
131+
warn("The decode_data default of True will change to False in 3.6;"
132+
" specify an explicit value for this keyword",
133+
DeprecationWarning, 2)
134+
decode_data = True
135+
self._decode_data = decode_data
136+
if decode_data:
137+
self._emptystring = ''
138+
self._linesep = '\r\n'
139+
self._dotsep = '.'
140+
self._newline = NEWLINE
141+
else:
142+
self._emptystring = b''
143+
self._linesep = b'\r\n'
144+
self._dotsep = b'.'
145+
self._newline = b'\n'
131146
self.received_lines = []
132147
self.smtp_state = self.COMMAND
133148
self.seen_greeting = ''
@@ -287,11 +302,14 @@ def collect_incoming_data(self, data):
287302
return
288303
elif limit:
289304
self.num_bytes += len(data)
290-
self.received_lines.append(str(data, "utf-8"))
305+
if self._decode_data:
306+
self.received_lines.append(str(data, 'utf-8'))
307+
else:
308+
self.received_lines.append(data)
291309

292310
# Implementation of base class abstract method
293311
def found_terminator(self):
294-
line = EMPTYSTRING.join(self.received_lines)
312+
line = self._emptystring.join(self.received_lines)
295313
print('Data:', repr(line), file=DEBUGSTREAM)
296314
self.received_lines = []
297315
if self.smtp_state == self.COMMAND:
@@ -300,6 +318,8 @@ def found_terminator(self):
300318
self.push('500 Error: bad syntax')
301319
return
302320
method = None
321+
if not self._decode_data:
322+
line = str(line, 'utf-8')
303323
i = line.find(' ')
304324
if i < 0:
305325
command = line.upper()
@@ -330,12 +350,12 @@ def found_terminator(self):
330350
# Remove extraneous carriage returns and de-transparency according
331351
# to RFC 5321, Section 4.5.2.
332352
data = []
333-
for text in line.split('\r\n'):
334-
if text and text[0] == '.':
353+
for text in line.split(self._linesep):
354+
if text and text[0] == self._dotsep:
335355
data.append(text[1:])
336356
else:
337357
data.append(text)
338-
self.received_data = NEWLINE.join(data)
358+
self.received_data = self._newline.join(data)
339359
status = self.smtp_server.process_message(self.peer,
340360
self.mailfrom,
341361
self.rcpttos,
@@ -577,10 +597,17 @@ class SMTPServer(asyncore.dispatcher):
577597
channel_class = SMTPChannel
578598

579599
def __init__(self, localaddr, remoteaddr,
580-
data_size_limit=DATA_SIZE_DEFAULT, map=None):
600+
data_size_limit=DATA_SIZE_DEFAULT, map=None,
601+
decode_data=None):
581602
self._localaddr = localaddr
582603
self._remoteaddr = remoteaddr
583604
self.data_size_limit = data_size_limit
605+
if decode_data is None:
606+
warn("The decode_data default of True will change to False in 3.6;"
607+
" specify an explicit value for this keyword",
608+
DeprecationWarning, 2)
609+
decode_data = True
610+
self._decode_data = decode_data
584611
asyncore.dispatcher.__init__(self, map=map)
585612
try:
586613
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -599,7 +626,7 @@ def __init__(self, localaddr, remoteaddr,
599626
def handle_accepted(self, conn, addr):
600627
print('Incoming connection from %s' % repr(addr), file=DEBUGSTREAM)
601628
channel = self.channel_class(self, conn, addr, self.data_size_limit,
602-
self._map)
629+
self._map, self._decode_data)
603630

604631
# API for "doing something useful with the message"
605632
def process_message(self, peer, mailfrom, rcpttos, data):

Lib/test/test_smtpd.py

Lines changed: 111 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,18 @@
77

88

99
class DummyServer(smtpd.SMTPServer):
10-
def __init__(self, localaddr, remoteaddr):
11-
smtpd.SMTPServer.__init__(self, localaddr, remoteaddr)
10+
def __init__(self, localaddr, remoteaddr, decode_data=True):
11+
smtpd.SMTPServer.__init__(self, localaddr, remoteaddr,
12+
decode_data=decode_data)
1213
self.messages = []
14+
if decode_data:
15+
self.return_status = 'return status'
16+
else:
17+
self.return_status = b'return status'
1318

1419
def process_message(self, peer, mailfrom, rcpttos, data):
1520
self.messages.append((peer, mailfrom, rcpttos, data))
16-
if data == 'return status':
21+
if data == self.return_status:
1722
return '250 Okish'
1823

1924

@@ -31,9 +36,9 @@ def setUp(self):
3136
smtpd.socket = asyncore.socket = mock_socket
3237

3338
def test_process_message_unimplemented(self):
34-
server = smtpd.SMTPServer('a', 'b')
39+
server = smtpd.SMTPServer('a', 'b', decode_data=True)
3540
conn, addr = server.accept()
36-
channel = smtpd.SMTPChannel(server, conn, addr)
41+
channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True)
3742

3843
def write_line(line):
3944
channel.socket.queue_recv(line)
@@ -45,6 +50,10 @@ def write_line(line):
4550
write_line(b'DATA')
4651
self.assertRaises(NotImplementedError, write_line, b'spam\r\n.\r\n')
4752

53+
def test_decode_data_default_warns(self):
54+
with self.assertWarns(DeprecationWarning):
55+
smtpd.SMTPServer('a', 'b')
56+
4857
def tearDown(self):
4958
asyncore.close_all()
5059
asyncore.socket = smtpd.socket = socket
@@ -57,7 +66,8 @@ def setUp(self):
5766
self.debug = smtpd.DEBUGSTREAM = io.StringIO()
5867
self.server = DummyServer('a', 'b')
5968
conn, addr = self.server.accept()
60-
self.channel = smtpd.SMTPChannel(self.server, conn, addr)
69+
self.channel = smtpd.SMTPChannel(self.server, conn, addr,
70+
decode_data=True)
6171

6272
def tearDown(self):
6373
asyncore.close_all()
@@ -502,6 +512,12 @@ def test_attribute_deprecations(self):
502512
with support.check_warnings(('', DeprecationWarning)):
503513
self.channel._SMTPChannel__addr = 'spam'
504514

515+
def test_decode_data_default_warning(self):
516+
server = DummyServer('a', 'b')
517+
conn, addr = self.server.accept()
518+
with self.assertWarns(DeprecationWarning):
519+
smtpd.SMTPChannel(server, conn, addr)
520+
505521

506522
class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase):
507523

@@ -512,7 +528,8 @@ def setUp(self):
512528
self.server = DummyServer('a', 'b')
513529
conn, addr = self.server.accept()
514530
# Set DATA size limit to 32 bytes for easy testing
515-
self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32)
531+
self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32,
532+
decode_data=True)
516533

517534
def tearDown(self):
518535
asyncore.close_all()
@@ -553,5 +570,92 @@ def test_data_limit_dialog_too_much_data(self):
553570
b'552 Error: Too much mail data\r\n')
554571

555572

573+
class SMTPDChannelWithDecodeDataFalse(unittest.TestCase):
574+
575+
def setUp(self):
576+
smtpd.socket = asyncore.socket = mock_socket
577+
self.old_debugstream = smtpd.DEBUGSTREAM
578+
self.debug = smtpd.DEBUGSTREAM = io.StringIO()
579+
self.server = DummyServer('a', 'b', decode_data=False)
580+
conn, addr = self.server.accept()
581+
# Set decode_data to False
582+
self.channel = smtpd.SMTPChannel(self.server, conn, addr,
583+
decode_data=False)
584+
585+
def tearDown(self):
586+
asyncore.close_all()
587+
asyncore.socket = smtpd.socket = socket
588+
smtpd.DEBUGSTREAM = self.old_debugstream
589+
590+
def write_line(self, line):
591+
self.channel.socket.queue_recv(line)
592+
self.channel.handle_read()
593+
594+
def test_ascii_data(self):
595+
self.write_line(b'HELO example')
596+
self.write_line(b'MAIL From:eggs@example')
597+
self.write_line(b'RCPT To:spam@example')
598+
self.write_line(b'DATA')
599+
self.write_line(b'plain ascii text')
600+
self.write_line(b'.')
601+
self.assertEqual(self.channel.received_data, b'plain ascii text')
602+
603+
def test_utf8_data(self):
604+
self.write_line(b'HELO example')
605+
self.write_line(b'MAIL From:eggs@example')
606+
self.write_line(b'RCPT To:spam@example')
607+
self.write_line(b'DATA')
608+
self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87')
609+
self.write_line(b'and some plain ascii')
610+
self.write_line(b'.')
611+
self.assertEqual(
612+
self.channel.received_data,
613+
b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87\n'
614+
b'and some plain ascii')
615+
616+
617+
class SMTPDChannelWithDecodeDataTrue(unittest.TestCase):
618+
619+
def setUp(self):
620+
smtpd.socket = asyncore.socket = mock_socket
621+
self.old_debugstream = smtpd.DEBUGSTREAM
622+
self.debug = smtpd.DEBUGSTREAM = io.StringIO()
623+
self.server = DummyServer('a', 'b')
624+
conn, addr = self.server.accept()
625+
# Set decode_data to True
626+
self.channel = smtpd.SMTPChannel(self.server, conn, addr,
627+
decode_data=True)
628+
629+
def tearDown(self):
630+
asyncore.close_all()
631+
asyncore.socket = smtpd.socket = socket
632+
smtpd.DEBUGSTREAM = self.old_debugstream
633+
634+
def write_line(self, line):
635+
self.channel.socket.queue_recv(line)
636+
self.channel.handle_read()
637+
638+
def test_ascii_data(self):
639+
self.write_line(b'HELO example')
640+
self.write_line(b'MAIL From:eggs@example')
641+
self.write_line(b'RCPT To:spam@example')
642+
self.write_line(b'DATA')
643+
self.write_line(b'plain ascii text')
644+
self.write_line(b'.')
645+
self.assertEqual(self.channel.received_data, 'plain ascii text')
646+
647+
def test_utf8_data(self):
648+
self.write_line(b'HELO example')
649+
self.write_line(b'MAIL From:eggs@example')
650+
self.write_line(b'RCPT To:spam@example')
651+
self.write_line(b'DATA')
652+
self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87')
653+
self.write_line(b'and some plain ascii')
654+
self.write_line(b'.')
655+
self.assertEqual(
656+
self.channel.received_data,
657+
'utf8 enriched text: żźć\nand some plain ascii')
658+
659+
556660
if __name__ == "__main__":
557661
unittest.main()

0 commit comments

Comments
 (0)