Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 236654b

Browse files
committed
Fix some string encoding issues with entity bodies in HTTP requests.
RFC 2616 says that iso-8859-1 is the default charset for HTTP entity bodies, but we encoded strings using ascii. See http://bugs.python.org/issue5314. Changed docs and code to use iso-8859-1. Also fix some brokenness with passing a file as the body instead of a string. Add tests to show that some of this behavior actually works.
1 parent 98eb6c2 commit 236654b

3 files changed

Lines changed: 108 additions & 19 deletions

File tree

Doc/library/http.client.rst

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -351,14 +351,22 @@ HTTPConnection Objects
351351

352352
.. method:: HTTPConnection.request(method, url[, body[, headers]])
353353

354-
This will send a request to the server using the HTTP request method *method*
355-
and the selector *url*. If the *body* argument is present, it should be a
356-
string of data to send after the headers are finished. Alternatively, it may
357-
be an open file object, in which case the contents of the file is sent; this
358-
file object should support ``fileno()`` and ``read()`` methods. The header
359-
Content-Length is automatically set to the correct value. The *headers*
360-
argument should be a mapping of extra HTTP headers to send with the request.
361-
354+
This will send a request to the server using the HTTP request
355+
method *method* and the selector *url*. If the *body* argument is
356+
present, it should be string or bytes object of data to send after
357+
the headers are finished. Strings are encoded as ISO-8859-1, the
358+
default charset for HTTP. To use other encodings, pass a bytes
359+
object. The Content-Length header is set to the length of the
360+
string.
361+
362+
The *body* may also be an open file object, in which case the
363+
contents of the file is sent; this file object should support
364+
``fileno()`` and ``read()`` methods. The header Content-Length is
365+
automatically set to the length of the file as reported by
366+
stat.
367+
368+
The *headers* argument should be a mapping of extra HTTP
369+
headers to send with the request.
362370

363371
.. method:: HTTPConnection.getresponse()
364372

Lib/http/client.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage):
243243
if line in (b'\r\n', b'\n', b''):
244244
break
245245
hstring = b''.join(headers).decode('iso-8859-1')
246-
247246
return email.parser.Parser(_class=_class).parsestr(hstring)
248247

249248
class HTTPResponse(io.RawIOBase):
@@ -675,13 +674,22 @@ def send(self, str):
675674
if self.debuglevel > 0:
676675
print("send:", repr(str))
677676
try:
678-
blocksize=8192
679-
if hasattr(str,'read') :
680-
if self.debuglevel > 0: print("sendIng a read()able")
681-
data=str.read(blocksize)
682-
while data:
677+
blocksize = 8192
678+
if hasattr(str, "read") :
679+
if self.debuglevel > 0:
680+
print("sendIng a read()able")
681+
encode = False
682+
if "b" not in str.mode:
683+
encode = True
684+
if self.debuglevel > 0:
685+
print("encoding file using iso-8859-1")
686+
while 1:
687+
data = str.read(blocksize)
688+
if not data:
689+
break
690+
if encode:
691+
data = data.encode("iso-8859-1")
683692
self.sock.sendall(data)
684-
data=str.read(blocksize)
685693
else:
686694
self.sock.sendall(str)
687695
except socket.error as v:
@@ -713,8 +721,8 @@ def _send_output(self, message_body=None):
713721
message_body = None
714722
self.send(msg)
715723
if message_body is not None:
716-
#message_body was not a string (i.e. it is a file) and
717-
#we must run the risk of Nagle
724+
# message_body was not a string (i.e. it is a file), and
725+
# we must run the risk of Nagle.
718726
self.send(message_body)
719727

720728
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
@@ -904,7 +912,9 @@ def _send_request(self, method, url, body, headers):
904912
for hdr, value in headers.items():
905913
self.putheader(hdr, value)
906914
if isinstance(body, str):
907-
body = body.encode('ascii')
915+
# RFC 2616 Section 3.7.1 says that text default has a
916+
# default charset of iso-8859-1.
917+
body = body.encode('iso-8859-1')
908918
self.endheaders(body)
909919

910920
def getresponse(self):

Lib/test/test_httplib.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,9 +272,80 @@ def test_attributes(self):
272272
h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
273273
self.assertEqual(h.timeout, 30)
274274

275+
class RequestBodyTest(TestCase):
276+
"""Test cases where a request includes a message body."""
277+
278+
def setUp(self):
279+
self.conn = httplib.HTTPConnection('example.com')
280+
self.sock = FakeSocket("")
281+
self.conn.sock = self.sock
282+
283+
def get_headers_and_fp(self):
284+
f = io.BytesIO(self.sock.data)
285+
f.readline() # read the request line
286+
message = httplib.parse_headers(f)
287+
return message, f
288+
289+
def test_manual_content_length(self):
290+
# Set an incorrect content-length so that we can verify that
291+
# it will not be over-ridden by the library.
292+
self.conn.request("PUT", "/url", "body",
293+
{"Content-Length": "42"})
294+
message, f = self.get_headers_and_fp()
295+
self.assertEqual("42", message.get("content-length"))
296+
self.assertEqual(4, len(f.read()))
297+
298+
def test_ascii_body(self):
299+
self.conn.request("PUT", "/url", "body")
300+
message, f = self.get_headers_and_fp()
301+
self.assertEqual("text/plain", message.get_content_type())
302+
self.assertEqual(None, message.get_charset())
303+
self.assertEqual("4", message.get("content-length"))
304+
self.assertEqual(b'body', f.read())
305+
306+
def test_latin1_body(self):
307+
self.conn.request("PUT", "/url", "body\xc1")
308+
message, f = self.get_headers_and_fp()
309+
self.assertEqual("text/plain", message.get_content_type())
310+
self.assertEqual(None, message.get_charset())
311+
self.assertEqual("5", message.get("content-length"))
312+
self.assertEqual(b'body\xc1', f.read())
313+
314+
def test_bytes_body(self):
315+
self.conn.request("PUT", "/url", b"body\xc1")
316+
message, f = self.get_headers_and_fp()
317+
self.assertEqual("text/plain", message.get_content_type())
318+
self.assertEqual(None, message.get_charset())
319+
self.assertEqual("5", message.get("content-length"))
320+
self.assertEqual(b'body\xc1', f.read())
321+
322+
def test_file_body(self):
323+
f = open(support.TESTFN, "w")
324+
f.write("body")
325+
f.close()
326+
f = open(support.TESTFN)
327+
self.conn.request("PUT", "/url", f)
328+
message, f = self.get_headers_and_fp()
329+
self.assertEqual("text/plain", message.get_content_type())
330+
self.assertEqual(None, message.get_charset())
331+
self.assertEqual("4", message.get("content-length"))
332+
self.assertEqual(b'body', f.read())
333+
334+
def test_binary_file_body(self):
335+
f = open(support.TESTFN, "wb")
336+
f.write(b"body\xc1")
337+
f.close()
338+
f = open(support.TESTFN, "rb")
339+
self.conn.request("PUT", "/url", f)
340+
message, f = self.get_headers_and_fp()
341+
self.assertEqual("text/plain", message.get_content_type())
342+
self.assertEqual(None, message.get_charset())
343+
self.assertEqual("5", message.get("content-length"))
344+
self.assertEqual(b'body\xc1', f.read())
345+
275346
def test_main(verbose=None):
276347
support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
277-
HTTPSTimeoutTest)
348+
HTTPSTimeoutTest, RequestBodyTest)
278349

279350
if __name__ == '__main__':
280351
test_main()

0 commit comments

Comments
 (0)