Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 024aaa1

Browse files
committed
SF Patch 549151: urllib2 POSTs on redirect
(contributed by John J Lee)
1 parent 1d5854f commit 024aaa1

4 files changed

Lines changed: 85 additions & 15 deletions

File tree

Doc/lib/liburllib.tex

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,18 @@ \section{\module{urllib} ---
254254

255255
\begin{classdesc}{FancyURLopener}{...}
256256
\class{FancyURLopener} subclasses \class{URLopener} providing default
257-
handling for the following HTTP response codes: 301, 302 or 401. For
258-
301 and 302 response codes, the \mailheader{Location} header is used to
259-
fetch the actual URL. For 401 response codes (authentication
260-
required), basic HTTP authentication is performed. For 301 and 302 response
261-
codes, recursion is bounded by the value of the \var{maxtries} attribute,
262-
which defaults 10.
257+
handling for the following HTTP response codes: 301, 302, 303 and 401.
258+
For 301, 302 and 303 response codes, the \mailheader{Location} header
259+
is used to fetch the actual URL. For 401 response codes
260+
(authentication required), basic HTTP authentication is performed.
261+
For 301, 302 and 303 response codes, recursion is bounded by the value
262+
of the \var{maxtries} attribute, which defaults 10.
263+
264+
\note{According to the letter of \rfc{2616}, 301 and 302 responses to
265+
POST requests must not be automatically redirected without
266+
confirmation by the user. In reality, browsers do allow automatic
267+
redirection of these responses, changing the POST to a GET, and
268+
\module{urllib} reproduces this behaviour.}
263269

264270
The parameters to the constructor are the same as those for
265271
\class{URLopener}.

Doc/lib/liburllib2.tex

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,12 @@ \subsection{Request Objects \label{request-objects}}
217217
request to be \code{POST} rather than \code{GET}.
218218
\end{methoddesc}
219219

220+
\begin{methoddesc}[Request]{get_method}{}
221+
Return a string indicating the HTTP request method. This is only
222+
meaningful for HTTP requests, and currently always takes one of the
223+
values ("GET", "POST").
224+
\end{methoddesc}
225+
220226
\begin{methoddesc}[Request]{has_data}{}
221227
Return whether the instance has a non-\code{None} data.
222228
\end{methoddesc}
@@ -394,25 +400,49 @@ \subsection{BaseHandler Objects \label{base-handler-objects}}
394400
for \method{http_error_default()}.
395401
\end{methoddesc}
396402

397-
398403
\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
399404

400-
\note{303 redirection is not supported by this version of
401-
\module{urllib2}.}
405+
\note{Some HTTP redirections require action from this module's client
406+
code. If this is the case, \exception{HTTPError} is raised. See
407+
\rfc{2616} for details of the precise meanings of the various
408+
redirection codes.}
409+
410+
\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
411+
fp, code, msg, hdrs}
412+
Return a \class{Request} or \code{None} in response to a redirect.
413+
This is called by the default implementations of the
414+
\code{http_error_30x()} methods when a redirection is received from
415+
the server. If a redirection should take place, return a new
416+
\class{Request} to allow \code{http_error_30x()} to perform the
417+
redirect. Otherwise, raise \exception{HTTPError} if no other
418+
\class{Handler} should try to handle this URL, or return \code{None}
419+
if you can't but another \class{Handler} might.
420+
421+
\note{The default implementation of this method does not strictly
422+
follow \rfc{2616}: it allows automatic 302 redirection of POST
423+
requests, because essentially all HTTP clients do this.}
424+
425+
\end{methoddesc}
426+
402427

403428
\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
404429
fp, code, msg, hdrs}
405430
Redirect to the \code{Location:} URL. This method is called by
406431
the parent \class{OpenerDirector} when getting an HTTP
407-
permanent-redirect response.
432+
`moved permanently' response.
408433
\end{methoddesc}
409434

410435
\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
411436
fp, code, msg, hdrs}
412437
The same as \method{http_error_301()}, but called for the
413-
temporary-redirect response.
438+
`found' response.
414439
\end{methoddesc}
415440

441+
\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
442+
fp, code, msg, hdrs}
443+
The same as \method{http_error_301()}, but called for the
444+
`see other' redirect response.
445+
\end{methoddesc}
416446

417447
\subsection{ProxyHandler Objects \label{proxy-handler}}
418448

Lib/urllib.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,10 @@ def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
586586
"""Error 301 -- also relocated (permanently)."""
587587
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
588588

589+
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
590+
"""Error 303 -- also relocated (essentially identical to 302)."""
591+
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
592+
589593
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
590594
"""Error 401 -- authentication required.
591595
See this URL for a description of the basic authentication scheme:

Lib/urllib2.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
Handlers needed to open the requested URL. For example, the
1212
HTTPHandler performs HTTP GET and POST requests and deals with
1313
non-error returns. The HTTPRedirectHandler automatically deals with
14-
HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15-
with digest authentication.
14+
HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15+
deals with digest authentication.
1616
1717
urlopen(url, data=None) -- basic usage is that same as original
1818
urllib. pass the url and optionally data to post to an HTTP URL, and
@@ -207,6 +207,12 @@ def __getattr__(self, attr):
207207
return getattr(self, attr)
208208
raise AttributeError, attr
209209

210+
def get_method(self):
211+
if self.has_data():
212+
return "POST"
213+
else:
214+
return "GET"
215+
210216
def add_data(self, data):
211217
self.data = data
212218

@@ -402,6 +408,26 @@ def http_error_default(self, req, fp, code, msg, hdrs):
402408
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
403409

404410
class HTTPRedirectHandler(BaseHandler):
411+
def redirect_request(self, req, fp, code, msg, headers):
412+
"""Return a Request or None in response to a redirect.
413+
414+
This is called by the http_error_30x methods when a redirection
415+
response is received. If a redirection should take place, return a new
416+
Request to allow http_error_30x to perform the redirect. Otherwise,
417+
raise HTTPError if no-one else should try to handle this url. Return
418+
None if you can't but another Handler might.
419+
420+
"""
421+
if (code in (301, 302, 303, 307) and req.method() in ("GET", "HEAD") or
422+
code in (302, 303) and req.method() == "POST"):
423+
# Strictly (according to RFC 2616), 302 in response to a POST
424+
# MUST NOT cause a redirection without confirmation from the user
425+
# (of urllib2, in this case). In practice, essentially all clients
426+
# do redirect in this case, so we do the same.
427+
return Request(newurl, headers=req.headers)
428+
else:
429+
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
430+
405431
# Implementation note: To avoid the server sending us into an
406432
# infinite loop, the request object needs to track what URLs we
407433
# have already seen. Do this by adding a handler-specific
@@ -418,7 +444,11 @@ def http_error_302(self, req, fp, code, msg, headers):
418444
# XXX Probably want to forget about the state of the current
419445
# request, although that might interact poorly with other
420446
# handlers that also use handler-specific request attributes
421-
new = Request(newurl, req.get_data(), req.headers)
447+
new = self.redirect_request(req, fp, code, msg, headers)
448+
if new is None:
449+
return
450+
451+
# loop detection
422452
new.error_302_dict = {}
423453
if hasattr(req, 'error_302_dict'):
424454
if len(req.error_302_dict)>10 or \
@@ -435,7 +465,7 @@ def http_error_302(self, req, fp, code, msg, headers):
435465

436466
return self.parent.open(new)
437467

438-
http_error_301 = http_error_302
468+
http_error_301 = http_error_303 = http_error_307 = http_error_302
439469

440470
inf_msg = "The HTTP server returned a redirect error that would" \
441471
"lead to an infinite loop.\n" \

0 commit comments

Comments
 (0)