@@ -254,12 +254,22 @@ def decodePage(page, contentEncoding, contentType):
254254 if not page or (conf .nullConnection and len (page ) < 2 ):
255255 return getUnicode (page )
256256
257- if isinstance (contentEncoding , basestring ) and contentEncoding .lower () in ("gzip" , "x-gzip" , "deflate" ):
257+ if isinstance (contentEncoding , basestring ) and contentEncoding :
258+ contentEncoding = contentEncoding .lower ()
259+ else :
260+ contentEncoding = ""
261+
262+ if isinstance (contentType , basestring ) and contentType :
263+ contentType = contentType .lower ()
264+ else :
265+ contentType = ""
266+
267+ if contentEncoding in ("gzip" , "x-gzip" , "deflate" ):
258268 if not kb .pageCompress :
259269 return None
260270
261271 try :
262- if contentEncoding . lower () == "deflate" :
272+ if contentEncoding == "deflate" :
263273 data = StringIO .StringIO (zlib .decompress (page , - 15 )) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
264274 else :
265275 data = gzip .GzipFile ("" , "rb" , 9 , StringIO .StringIO (page ))
@@ -284,7 +294,7 @@ def decodePage(page, contentEncoding, contentType):
284294 httpCharset , metaCharset = None , None
285295
286296 # Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
287- if contentType and ( contentType .find ("charset=" ) != - 1 ) :
297+ if contentType .find ("charset=" ) != - 1 :
288298 httpCharset = checkCharEncoding (contentType .split ("charset=" )[- 1 ])
289299
290300 metaCharset = checkCharEncoding (extractRegexResult (META_CHARSET_REGEX , page ))
@@ -300,7 +310,7 @@ def decodePage(page, contentEncoding, contentType):
300310 kb .pageEncoding = conf .encoding
301311
302312 # can't do for all responses because we need to support binary files too
303- if contentType and not isinstance (page , unicode ) and "text/" in contentType . lower () :
313+ if not isinstance (page , unicode ) and "text/" in contentType :
304314 if kb .heuristicMode :
305315 kb .pageEncoding = kb .pageEncoding or checkCharEncoding (getHeuristicCharEncoding (page ))
306316 page = getUnicode (page , kb .pageEncoding )
0 commit comments