1111Handlers needed to open the requested URL. For example, the
1212HTTPHandler performs HTTP GET and POST requests and deals with
1313non-error returns. The HTTPRedirectHandler automatically deals with
14- HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15- with digest authentication.
14+ HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15+ deals with digest authentication.
1616
1717urlopen(url, data=None) -- basic usage is that same as original
1818urllib. pass the url and optionally data to post to an HTTP URL, and
@@ -207,6 +207,12 @@ def __getattr__(self, attr):
207207 return getattr (self , attr )
208208 raise AttributeError , attr
209209
210+ def get_method (self ):
211+ if self .has_data ():
212+ return "POST"
213+ else :
214+ return "GET"
215+
210216 def add_data (self , data ):
211217 self .data = data
212218
@@ -402,6 +408,26 @@ def http_error_default(self, req, fp, code, msg, hdrs):
402408 raise HTTPError (req .get_full_url (), code , msg , hdrs , fp )
403409
404410class HTTPRedirectHandler (BaseHandler ):
411+ def redirect_request (self , req , fp , code , msg , headers ):
412+ """Return a Request or None in response to a redirect.
413+
414+ This is called by the http_error_30x methods when a redirection
415+ response is received. If a redirection should take place, return a new
416+ Request to allow http_error_30x to perform the redirect. Otherwise,
417+ raise HTTPError if no-one else should try to handle this url. Return
418+ None if you can't but another Handler might.
419+
420+ """
421+ if (code in (301 , 302 , 303 , 307 ) and req .method () in ("GET" , "HEAD" ) or
422+ code in (302 , 303 ) and req .method () == "POST" ):
423+ # Strictly (according to RFC 2616), 302 in response to a POST
424+ # MUST NOT cause a redirection without confirmation from the user
425+ # (of urllib2, in this case). In practice, essentially all clients
426+ # do redirect in this case, so we do the same.
427+ return Request (newurl , headers = req .headers )
428+ else :
429+ raise HTTPError (req .get_full_url (), code , msg , hdrs , fp )
430+
405431 # Implementation note: To avoid the server sending us into an
406432 # infinite loop, the request object needs to track what URLs we
407433 # have already seen. Do this by adding a handler-specific
@@ -418,7 +444,11 @@ def http_error_302(self, req, fp, code, msg, headers):
418444 # XXX Probably want to forget about the state of the current
419445 # request, although that might interact poorly with other
420446 # handlers that also use handler-specific request attributes
421- new = Request (newurl , req .get_data (), req .headers )
447+ new = self .redirect_request (req , fp , code , msg , headers )
448+ if new is None :
449+ return
450+
451+ # loop detection
422452 new .error_302_dict = {}
423453 if hasattr (req , 'error_302_dict' ):
424454 if len (req .error_302_dict )> 10 or \
@@ -435,7 +465,7 @@ def http_error_302(self, req, fp, code, msg, headers):
435465
436466 return self .parent .open (new )
437467
438- http_error_301 = http_error_302
468+ http_error_301 = http_error_303 = http_error_307 = http_error_302
439469
440470 inf_msg = "The HTTP server returned a redirect error that would" \
441471 "lead to an infinite loop.\n " \
0 commit comments