55
66import sys , os , time
77
8- __all__ = ['BaseHandler' , 'SimpleHandler' , 'BaseCGIHandler' , 'CGIHandler' ]
8+ __all__ = [
9+ 'BaseHandler' , 'SimpleHandler' , 'BaseCGIHandler' , 'CGIHandler' ,
10+ 'IISCGIHandler' , 'read_environ'
11+ ]
912
1013# Weekday and month names for HTTP date/time formatting; always English!
1114_weekdayname = ["Mon" , "Tue" , "Wed" , "Thu" , "Fri" , "Sat" , "Sun" ]
@@ -19,6 +22,74 @@ def format_date_time(timestamp):
1922 _weekdayname [wd ], day , _monthname [month ], year , hh , mm , ss
2023 )
2124
25+ _is_request = {
26+ 'SCRIPT_NAME' , 'PATH_INFO' , 'QUERY_STRING' , 'REQUEST_METHOD' , 'AUTH_TYPE' ,
27+ 'CONTENT_TYPE' , 'CONTENT_LENGTH' , 'HTTPS' , 'REMOTE_USER' , 'REMOTE_IDENT' ,
28+ }.__contains__
29+
30+ def _needs_transcode (k ):
31+ return _is_request (k ) or k .startswith ('HTTP_' ) or k .startswith ('SSL_' ) \
32+ or (k .startswith ('REDIRECT_' ) and _needs_transcode (k [9 :]))
33+
34+ def read_environ ():
35+ """Read environment, fixing HTTP variables"""
36+ enc = sys .getfilesystemencoding ()
37+ esc = 'surrogateescape'
38+ try :
39+ '' .encode ('utf-8' , esc )
40+ except LookupError :
41+ esc = 'replace'
42+ environ = {}
43+
44+ # Take the basic environment from native-unicode os.environ. Attempt to
45+ # fix up the variables that come from the HTTP request to compensate for
46+ # the bytes->unicode decoding step that will already have taken place.
47+ for k , v in os .environ .items ():
48+ if _needs_transcode (k ):
49+
50+ # On win32, the os.environ is natively Unicode. Different servers
51+ # decode the request bytes using different encodings.
52+ if sys .platform == 'win32' :
53+ software = os .environ .get ('SERVER_SOFTWARE' , '' ).lower ()
54+
55+ # On IIS, the HTTP request will be decoded as UTF-8 as long
56+ # as the input is a valid UTF-8 sequence. Otherwise it is
57+ # decoded using the system code page (mbcs), with no way to
58+ # detect this has happened. Because UTF-8 is the more likely
59+ # encoding, and mbcs is inherently unreliable (an mbcs string
60+ # that happens to be valid UTF-8 will not be decoded as mbcs)
61+ # always recreate the original bytes as UTF-8.
62+ if software .startswith ('microsoft-iis/' ):
63+ v = v .encode ('utf-8' ).decode ('iso-8859-1' )
64+
65+ # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
66+ # to the Unicode environ. No modification needed.
67+ elif software .startswith ('apache/' ):
68+ pass
69+
70+ # Python 3's http.server.CGIHTTPRequestHandler decodes
71+ # using the urllib.unquote default of UTF-8, amongst other
72+ # issues.
73+ elif (
74+ software .startswith ('simplehttp/' )
75+ and 'python/3' in software
76+ ):
77+ v = v .encode ('utf-8' ).decode ('iso-8859-1' )
78+
79+ # For other servers, guess that they have written bytes to
80+ # the environ using stdio byte-oriented interfaces, ending up
81+ # with the system code page.
82+ else :
83+ v = v .encode (enc , 'replace' ).decode ('iso-8859-1' )
84+
85+ # Recover bytes from unicode environ, using surrogate escapes
86+ # where available (Python 3.1+).
87+ else :
88+ v = v .encode (enc , esc ).decode ('iso-8859-1' )
89+
90+ environ [k ] = v
91+ return environ
92+
2293
2394class BaseHandler :
2495 """Manage the invocation of a WSGI application"""
@@ -36,7 +107,7 @@ class BaseHandler:
36107 # os_environ is used to supply configuration from the OS environment:
37108 # by default it's a copy of 'os.environ' as of import time, but you can
38109 # override this in e.g. your __init__ method.
39- os_environ = dict ( os . environ . items () )
110+ os_environ = read_environ ( )
40111
41112 # Collaborator classes
42113 wsgi_file_wrapper = FileWrapper # set to None to disable
@@ -431,6 +502,42 @@ class CGIHandler(BaseCGIHandler):
431502
432503 def __init__ (self ):
433504 BaseCGIHandler .__init__ (
434- self , sys .stdin , sys .stdout , sys .stderr , dict (os .environ .items ()),
435- multithread = False , multiprocess = True
505+ self , sys .stdin .buffer , sys .stdout .buffer , sys .stderr ,
506+ read_environ (), multithread = False , multiprocess = True
507+ )
508+
509+
510+ class IISCGIHandler (BaseCGIHandler ):
511+ """CGI-based invocation with workaround for IIS path bug
512+
513+ This handler should be used in preference to CGIHandler when deploying on
514+ Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
515+ or metabase allowPathInfoForScriptMappings (IIS<7).
516+ """
517+ wsgi_run_once = True
518+ os_environ = {}
519+
520+ # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
521+ # the front, causing problems for WSGI applications that wish to implement
522+ # routing. This handler strips any such duplicated path.
523+
524+ # IIS can be configured to pass the correct PATH_INFO, but this causes
525+ # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
526+ # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
527+ # setting can only be made on a vhost level, affecting all other script
528+ # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
529+ # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
530+ # rarely uses it because there is still no UI for it.)
531+
532+ # There is no way for CGI code to tell whether the option was set, so a
533+ # separate handler class is provided.
534+ def __init__ (self ):
535+ environ = read_environ ()
536+ path = environ .get ('PATH_INFO' , '' )
537+ script = environ .get ('SCRIPT_NAME' , '' )
538+ if (path + '/' ).startswith (script + '/' ):
539+ environ ['PATH_INFO' ] = path [len (script ):]
540+ BaseCGIHandler .__init__ (
541+ self , sys .stdin .buffer , sys .stdout .buffer , sys .stderr ,
542+ environ , multithread = False , multiprocess = True
436543 )
0 commit comments