Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d46aa37

Browse files
committed
Handle HTTP/0.9 responses.
Section 19.6 of RFC 2616 (HTTP/1.1): It is beyond the scope of a protocol specification to mandate compliance with previous versions. HTTP/1.1 was deliberately designed, however, to make supporting previous versions easy.... And we would expect HTTP/1.1 clients to: - recognize the format of the Status-Line for HTTP/1.0 and 1.1 responses; - understand any valid response in the format of HTTP/0.9, 1.0, or 1.1. The changes to the code do handle response in the format of HTTP/0.9. Some users may consider this a bug because all responses with a sufficiently corrupted status line will look like an HTTP/0.9 response. These users can pass strict=1 to the HTTP constructors to get a BadStatusLine exception instead. While this is a new feature of sorts, it enhances the robustness of the code (be tolerant in what you accept). Thus, I consider it a bug fix candidate. XXX strict needs to be documented.
1 parent 889f8bf commit d46aa37

1 file changed

Lines changed: 132 additions & 19 deletions

File tree

Lib/httplib.py

Lines changed: 132 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,19 @@
9595

9696

9797
class HTTPResponse:
98-
def __init__(self, sock, debuglevel=0):
98+
99+
# strict: If true, raise BadStatusLine if the status line can't be
100+
# parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
101+
# false because it prvents clients from talking to HTTP/0.9
102+
# servers. Note that a response with a sufficiently corrupted
103+
# status line will look like an HTTP/0.9 response.
104+
105+
# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
106+
107+
def __init__(self, sock, debuglevel=0, strict=0):
99108
self.fp = sock.makefile('rb', 0)
100109
self.debuglevel = debuglevel
110+
self.strict = strict
101111

102112
self.msg = None
103113

@@ -112,6 +122,7 @@ def __init__(self, sock, debuglevel=0):
112122
self.will_close = _UNKNOWN # conn will close at end of response
113123

114124
def _read_status(self):
125+
# Initialize with Simple-Response defaults
115126
line = self.fp.readline()
116127
if self.debuglevel > 0:
117128
print "reply:", repr(line)
@@ -122,12 +133,17 @@ def _read_status(self):
122133
[version, status] = line.split(None, 1)
123134
reason = ""
124135
except ValueError:
125-
version = "HTTP/0.9"
126-
status = "200"
127-
reason = ""
128-
if version[:5] != 'HTTP/':
129-
self.close()
130-
raise BadStatusLine(line)
136+
# empty version will cause next test to fail and status
137+
# will be treated as 0.9 response.
138+
version = ""
139+
if not version.startswith('HTTP/'):
140+
if self.strict:
141+
self.close()
142+
raise BadStatusLine(line)
143+
else:
144+
# assume it's a Simple-Response from an 0.9 server
145+
self.fp = LineAndFileWrapper(line, self.fp)
146+
return "HTTP/0.9", 200, ""
131147

132148
# The status code is a three-digit number
133149
try:
@@ -169,6 +185,7 @@ def _begin(self):
169185

170186
if self.version == 9:
171187
self.chunked = 0
188+
self.will_close = 1
172189
self.msg = mimetools.Message(StringIO())
173190
return
174191

@@ -353,13 +370,16 @@ class HTTPConnection:
353370
default_port = HTTP_PORT
354371
auto_open = 1
355372
debuglevel = 0
373+
strict = 0
356374

357-
def __init__(self, host, port=None):
375+
def __init__(self, host, port=None, strict=None):
358376
self.sock = None
359377
self.__response = None
360378
self.__state = _CS_IDLE
361-
379+
362380
self._set_hostport(host, port)
381+
if strict is not None:
382+
self.strict = strict
363383

364384
def _set_hostport(self, host, port):
365385
if port is None:
@@ -610,9 +630,10 @@ def getresponse(self):
610630
raise ResponseNotReady()
611631

612632
if self.debuglevel > 0:
613-
response = self.response_class(self.sock, self.debuglevel)
633+
response = self.response_class(self.sock, self.debuglevel,
634+
strict=self.strict)
614635
else:
615-
response = self.response_class(self.sock)
636+
response = self.response_class(self.sock, strict=self.strict)
616637

617638
response._begin()
618639
assert response.will_close != _UNKNOWN
@@ -733,8 +754,9 @@ class HTTPSConnection(HTTPConnection):
733754

734755
default_port = HTTPS_PORT
735756

736-
def __init__(self, host, port=None, key_file=None, cert_file=None):
737-
HTTPConnection.__init__(self, host, port)
757+
def __init__(self, host, port=None, key_file=None, cert_file=None,
758+
strict=None):
759+
HTTPConnection.__init__(self, host, port, strict)
738760
self.key_file = key_file
739761
self.cert_file = cert_file
740762

@@ -760,7 +782,7 @@ class HTTP:
760782

761783
_connection_class = HTTPConnection
762784

763-
def __init__(self, host='', port=None):
785+
def __init__(self, host='', port=None, strict=None):
764786
"Provide a default host, since the superclass requires one."
765787

766788
# some joker passed 0 explicitly, meaning default port
@@ -770,7 +792,7 @@ def __init__(self, host='', port=None):
770792
# Note that we may pass an empty string as the host; this will throw
771793
# an error when we attempt to connect. Presumably, the client code
772794
# will call connect before then, with a proper host.
773-
self._setup(self._connection_class(host, port))
795+
self._setup(self._connection_class(host, port, strict))
774796

775797
def _setup(self, conn):
776798
self._conn = conn
@@ -850,18 +872,20 @@ class HTTPS(HTTP):
850872

851873
_connection_class = HTTPSConnection
852874

853-
def __init__(self, host='', port=None, **x509):
875+
def __init__(self, host='', port=None, key_file=None, cert_file=None,
876+
strict=None):
854877
# provide a default host, pass the X509 cert info
855878

856879
# urf. compensate for bad input.
857880
if port == 0:
858881
port = None
859-
self._setup(self._connection_class(host, port, **x509))
882+
self._setup(self._connection_class(host, port, key_file,
883+
cert_file, strict))
860884

861885
# we never actually use these for anything, but we keep them
862886
# here for compatibility with post-1.5.2 CVS.
863-
self.key_file = x509.get('key_file')
864-
self.cert_file = x509.get('cert_file')
887+
self.key_file = key_file
888+
self.cert_file = cert_file
865889

866890

867891
class HTTPException(Exception):
@@ -906,6 +930,65 @@ def __init__(self, line):
906930
# for backwards compatibility
907931
error = HTTPException
908932

933+
class LineAndFileWrapper:
934+
"""A limited file-like object for HTTP/0.9 responses."""
935+
936+
# The status-line parsing code calls readline(), which normally
937+
# get the HTTP status line. For a 0.9 response, however, this is
938+
# actually the first line of the body! Clients need to get a
939+
# readable file object that contains that line.
940+
941+
def __init__(self, line, file):
942+
self._line = line
943+
self._file = file
944+
self._line_consumed = 0
945+
self._line_offset = 0
946+
self._line_left = len(line)
947+
948+
def __getattr__(self, attr):
949+
return getattr(self._file, attr)
950+
951+
def _done(self):
952+
# called when the last byte is read from the line. After the
953+
# call, all read methods are delegated to the underlying file
954+
# obhect.
955+
self._line_consumed = 1
956+
self.read = self._file.read
957+
self.readline = self._file.readline
958+
self.readlines = self._file.readlines
959+
960+
def read(self, amt=None):
961+
assert not self._line_consumed and self._line_left
962+
if amt is None or amt > self._line_left:
963+
s = self._line[self._line_offset:]
964+
self._done()
965+
if amt is None:
966+
return s + self._file.read()
967+
else:
968+
return s + self._file.read(amt - len(s))
969+
else:
970+
assert amt <= self._line_left
971+
i = self._line_offset
972+
j = i + amt
973+
s = self._line[i:j]
974+
self._line_offset = j
975+
self._line_left -= amt
976+
if self._line_left == 0:
977+
self._done()
978+
return s
979+
980+
def readline(self):
981+
s = self._line[self._line_offset:]
982+
self._done()
983+
return s
984+
985+
def readlines(self, size=None):
986+
L = [self._line[self._line_offset:]]
987+
self._done()
988+
if size is None:
989+
return L + self._file.readlines()
990+
else:
991+
return L + self._file.readlines(size)
909992

910993
#
911994
# snarfed from httplib.py for now...
@@ -971,5 +1054,35 @@ class HTTP11(HTTP):
9711054
print "read", len(hs.getfile().read())
9721055

9731056

1057+
# Test a buggy server -- returns garbled status line.
1058+
# http://www.yahoo.com/promotions/mom_com97/supermom.html
1059+
c = HTTPConnection("promotions.yahoo.com")
1060+
c.set_debuglevel(1)
1061+
c.connect()
1062+
c.request("GET", "/promotions/mom_com97/supermom.html")
1063+
r = c.getresponse()
1064+
print r.status, r.version
1065+
lines = r.read().split("\n")
1066+
print "\n".join(lines[:5])
1067+
1068+
c = HTTPConnection("promotions.yahoo.com", strict=1)
1069+
c.set_debuglevel(1)
1070+
c.connect()
1071+
c.request("GET", "/promotions/mom_com97/supermom.html")
1072+
try:
1073+
r = c.getresponse()
1074+
except BadStatusLine, err:
1075+
print "strict mode failed as expected"
1076+
else:
1077+
print "XXX strict mode should have failed"
1078+
1079+
for strict in 0, 1:
1080+
h = HTTP(strict=strict)
1081+
h.connect("promotions.yahoo.com")
1082+
h.putrequest('GET', "/promotions/mom_com97/supermom.html")
1083+
h.endheaders()
1084+
status, reason, headers = h.getreply()
1085+
assert (strict and status == -1) or status == 200, (strict, status)
1086+
9741087
if __name__ == '__main__':
9751088
test()

0 commit comments

Comments
 (0)