Bug fix (disable HTML decoding in XSS checks)

stamparm · stamparm · commit 0977f6df61b4 · 2019-11-18T11:40:07.000+01:00
diff --git a/lib/controller/checks.py b/lib/controller/checks.py
@@ -1104,6 +1104,7 @@ def _(page):
         logger.warn(infoMsg)
 
     kb.heuristicMode = True
+    kb.disableHtmlDecoding = True
 
     randStr1, randStr2 = randomStr(NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH), randomStr(NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH)
     value = "%s%s%s" % (randStr1, DUMMY_NON_SQLI_CHECK_APPENDIX, randStr2)
@@ -1123,6 +1124,7 @@ def _(page):
             logger.info(infoMsg)
             break
 
+    kb.disableHtmlDecoding = False
     kb.heuristicMode = False
 
     return kb.heuristicTest
diff --git a/lib/core/option.py b/lib/core/option.py
@@ -1872,6 +1872,7 @@ def _setKnowledgeBaseAttributes(flushAll=True):
 
     kb.delayCandidates = TIME_DELAY_CANDIDATES * [0]
     kb.dep = None
+    kb.disableHtmlDecoding = False
     kb.dnsMode = False
     kb.dnsTest = None
     kb.docRoot = None
diff --git a/lib/core/settings.py b/lib/core/settings.py
@@ -18,7 +18,7 @@
 from thirdparty.six import unichr as _unichr
 
 # sqlmap version (<major>.<minor>.<month>.<monthly commit>)
-VERSION = "1.3.11.75"
+VERSION = "1.3.11.76"
 TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
 TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
 VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
diff --git a/lib/request/basic.py b/lib/request/basic.py
@@ -334,41 +334,44 @@ def decodePage(page, contentEncoding, contentType, percentDecode=True):
 
     # can't do for all responses because we need to support binary files too
     if isinstance(page, six.binary_type) and "text/" in contentType:
-        # e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
-        if b"&#" in page:
-            page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)), page)
-            page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
-
-        # e.g. %20%28%29
-        if percentDecode:
-            if b"%" in page:
-                page = re.sub(b"%([0-9a-fA-F]{2})", lambda _: decodeHex(_.group(1)), page)
-
-        # e.g. &amp;
-        page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
-
-        kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
-
-        if (kb.pageEncoding or "").lower() == "utf-8-sig":
-            kb.pageEncoding = "utf-8"
-            if page and page.startswith("\xef\xbb\xbf"):  # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
-                page = page[3:]
-
-        page = getUnicode(page, kb.pageEncoding)
-
-        # e.g. &#8217;&#8230;&#8482;
-        if "&#" in page:
-            def _(match):
-                retVal = match.group(0)
-                try:
-                    retVal = _unichr(int(match.group(1)))
-                except (ValueError, OverflowError):
-                    pass
-                return retVal
-            page = re.sub(r"&#(\d+);", _, page)
-
-        # e.g. &zeta;
-        page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
+        if not kb.disableHtmlDecoding:
+            # e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
+            if b"&#" in page:
+                page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)), page)
+                page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
+
+            # e.g. %20%28%29
+            if percentDecode:
+                if b"%" in page:
+                    page = re.sub(b"%([0-9a-fA-F]{2})", lambda _: decodeHex(_.group(1)), page)
+
+            # e.g. &amp;
+            page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
+
+            kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
+
+            if (kb.pageEncoding or "").lower() == "utf-8-sig":
+                kb.pageEncoding = "utf-8"
+                if page and page.startswith("\xef\xbb\xbf"):  # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
+                    page = page[3:]
+
+            page = getUnicode(page, kb.pageEncoding)
+
+            # e.g. &#8217;&#8230;&#8482;
+            if "&#" in page:
+                def _(match):
+                    retVal = match.group(0)
+                    try:
+                        retVal = _unichr(int(match.group(1)))
+                    except (ValueError, OverflowError):
+                        pass
+                    return retVal
+                page = re.sub(r"&#(\d+);", _, page)
+
+            # e.g. &zeta;
+            page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
+        else:
+            page = getUnicode(page, kb.pageEncoding)
 
     return page