Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5d9c303

Browse files
committed
Fix urllib2.urlopen() handling of chunked content encoding.
The change to use the newer httplib interface admitted the possibility that we'd get an HTTP/1.1 chunked response, but the code didn't handle it correctly. The raw socket object can't be pass to addinfourl(), because it would read the undecoded response. Instead, addinfourl() must call HTTPResponse.read(), which will handle the decoding. One extra wrinkle is that the HTTPReponse object can't be passed to addinfourl() either, because it doesn't implement readline() or readlines(). As a quick hack, use socket._fileobject(), which implements those methods on top of a read buffer. (suggested by mwh) Finally, add some tests based on test_urllibnet. Thanks to Andrew Sawyers for originally reporting the chunked problem.
1 parent 1baa248 commit 5d9c303

3 files changed

Lines changed: 108 additions & 2 deletions

File tree

Lib/test/test_urllib2.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,8 @@ def __init__(self, fp, msg, status, reason):
423423
self.msg = msg
424424
self.status = status
425425
self.reason = reason
426+
def read(self):
427+
return ''
426428
class MockHTTPClass:
427429
def __init__(self):
428430
self.req_headers = []

Lib/test/test_urllib2net.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/usr/bin/env python
2+
3+
import unittest
4+
from test import test_support
5+
6+
import socket
7+
import urllib2
8+
import sys
9+
import os
10+
import mimetools
11+
12+
class URLTimeoutTest(unittest.TestCase):
13+
14+
TIMEOUT = 10.0
15+
16+
def setUp(self):
17+
socket.setdefaulttimeout(self.TIMEOUT)
18+
19+
def tearDown(self):
20+
socket.setdefaulttimeout(None)
21+
22+
def testURLread(self):
23+
f = urllib2.urlopen("http://www.python.org/")
24+
x = f.read()
25+
26+
class urlopenNetworkTests(unittest.TestCase):
27+
"""Tests urllib2.urlopen using the network.
28+
29+
These tests are not exhaustive. Assuming that testing using files does a
30+
good job overall of some of the basic interface features. There are no
31+
tests exercising the optional 'data' and 'proxies' arguments. No tests
32+
for transparent redirection have been written.
33+
34+
setUp is not used for always constructing a connection to
35+
http://www.python.org/ since there a few tests that don't use that address
36+
and making a connection is expensive enough to warrant minimizing unneeded
37+
connections.
38+
39+
"""
40+
41+
def test_basic(self):
42+
# Simple test expected to pass.
43+
open_url = urllib2.urlopen("http://www.python.org/")
44+
for attr in ("read", "close", "info", "geturl"):
45+
self.assert_(hasattr(open_url, attr), "object returned from "
46+
"urlopen lacks the %s attribute" % attr)
47+
try:
48+
self.assert_(open_url.read(), "calling 'read' failed")
49+
finally:
50+
open_url.close()
51+
52+
def test_info(self):
53+
# Test 'info'.
54+
open_url = urllib2.urlopen("http://www.python.org/")
55+
try:
56+
info_obj = open_url.info()
57+
finally:
58+
open_url.close()
59+
self.assert_(isinstance(info_obj, mimetools.Message),
60+
"object returned by 'info' is not an instance of "
61+
"mimetools.Message")
62+
self.assertEqual(info_obj.getsubtype(), "html")
63+
64+
def test_geturl(self):
65+
# Make sure same URL as opened is returned by geturl.
66+
URL = "http://www.python.org/"
67+
open_url = urllib2.urlopen(URL)
68+
try:
69+
gotten_url = open_url.geturl()
70+
finally:
71+
open_url.close()
72+
self.assertEqual(gotten_url, URL)
73+
74+
def test_bad_address(self):
75+
# Make sure proper exception is raised when connecting to a bogus
76+
# address.
77+
self.assertRaises(IOError,
78+
# SF patch 809915: In Sep 2003, VeriSign started
79+
# highjacking invalid .com and .net addresses to
80+
# boost traffic to their own site. This test
81+
# started failing then. One hopes the .invalid
82+
# domain will be spared to serve its defined
83+
# purpose.
84+
# urllib2.urlopen, "http://www.sadflkjsasadf.com/")
85+
urllib2.urlopen, "http://www.python.invalid/")
86+
87+
def test_main():
88+
test_support.requires("network")
89+
test_support.run_unittest(URLTimeoutTest, urlopenNetworkTests)
90+
91+
if __name__ == "__main__":
92+
test_main()

Lib/urllib2.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -997,8 +997,20 @@ def do_open(self, http_class, req):
997997
raise URLError(err)
998998

999999
# Pick apart the HTTPResponse object to get the addinfourl
1000-
# object initialized properly
1001-
resp = addinfourl(r.fp, r.msg, req.get_full_url())
1000+
# object initialized properly.
1001+
1002+
# Wrap the HTTPResponse object in socket's file object adapter
1003+
# for Windows. That adapter calls recv(), so delegate recv()
1004+
# to read(). This weird wrapping allows the returned object to
1005+
# have readline() and readlines() methods.
1006+
1007+
# XXX It might be better to extract the read buffering code
1008+
# out of socket._fileobject() and into a base class.
1009+
1010+
r.recv = r.read
1011+
fp = socket._fileobject(r)
1012+
1013+
resp = addinfourl(fp, r.msg, req.get_full_url())
10021014
resp.code = r.status
10031015
resp.msg = r.reason
10041016
return resp

0 commit comments

Comments
 (0)