Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 62980d7

Browse files
committed
Automatically decoding url encoded data in response
1 parent 9e49d8c commit 62980d7

1 file changed

Lines changed: 9 additions & 5 deletions

File tree

lib/request/basic.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -227,23 +227,27 @@ def decodePage(page, contentEncoding, contentType):
227227
kb.pageEncoding = conf.charset
228228

229229
# can't do for all responses because we need to support binary files too
230-
if contentType and not isinstance(page, unicode) and any(map(lambda _: _ in contentType.lower(), ("text/txt", "text/raw", "text/html", "text/xml"))):
230+
if contentType and not isinstance(page, unicode) and "text/" in contentType.lower():
231231
# e.g. Ãëàâà
232232
if "&#" in page:
233-
page = re.sub('&#(\d{1,3});', lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
233+
page = re.sub(r"&#(\d{1,3});", lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
234+
235+
# e.g. %20%28%29
236+
if "%" in page:
237+
page = re.sub(r"%([0-9a-fA-F]{2})", lambda _: _.group(1).decode("hex"), page)
234238

235239
# e.g. &amp;
236-
page = re.sub('&([^;]+);', lambda _: chr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 256) < 256 else _.group(0), page)
240+
page = re.sub(r"&([^;]+);", lambda _: chr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 256) < 256 else _.group(0), page)
237241

238242
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
239243
page = getUnicode(page, kb.pageEncoding)
240244

241245
# e.g. &#8217;&#8230;&#8482;
242246
if "&#" in page:
243-
page = re.sub('&#(\d+);', lambda _: unichr(int(_.group(1))), page)
247+
page = re.sub(r"&#(\d+);", lambda _: unichr(int(_.group(1))), page)
244248

245249
# e.g. &zeta;
246-
page = re.sub('&([^;]+);', lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)
250+
page = re.sub(r"&([^;]+);", lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)
247251

248252
return page
249253

0 commit comments

Comments
 (0)