Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a86f979

Browse files
committed
Minor refactoring together with a wider support for html entities
1 parent 20a6656 commit a86f979

3 files changed

Lines changed: 282 additions & 1 deletion

File tree

lib/core/decorators.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
1-
# Reference: http://code.activestate.com/recipes/325205-cache-decorator-in-python-24/
1+
#!/usr/bin/env python
2+
3+
"""
4+
Copyright (c) 2006-2012 sqlmap developers (http://sqlmap.org/)
5+
See the file 'doc/COPYING' for copying permission
6+
"""
7+
28
def cachedmethod(f, cache={}):
9+
"""
10+
Method with a cached content
11+
12+
Reference: http://code.activestate.com/recipes/325205-cache-decorator-in-python-24/
13+
"""
314
def _(*args, **kwargs):
415
key = (f, tuple(args), frozenset(kwargs.items()))
516
if key not in cache:

lib/core/htmlentities.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
Copyright (c) 2006-2012 sqlmap developers (http://sqlmap.org/)
5+
See the file 'doc/COPYING' for copying permission
6+
"""
7+
8+
# Reference: http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html
9+
10+
htmlEntities = {
11+
'quot': 34,
12+
'amp': 38,
13+
'lt': 60,
14+
'gt': 62,
15+
'nbsp': 160,
16+
'iexcl': 161,
17+
'cent': 162,
18+
'pound': 163,
19+
'curren': 164,
20+
'yen': 165,
21+
'brvbar': 166,
22+
'sect': 167,
23+
'uml': 168,
24+
'copy': 169,
25+
'ordf': 170,
26+
'laquo': 171,
27+
'not': 172,
28+
'shy': 173,
29+
'reg': 174,
30+
'macr': 175,
31+
'deg': 176,
32+
'plusmn': 177,
33+
'sup2': 178,
34+
'sup3': 179,
35+
'acute': 180,
36+
'micro': 181,
37+
'para': 182,
38+
'middot': 183,
39+
'cedil': 184,
40+
'sup1': 185,
41+
'ordm': 186,
42+
'raquo': 187,
43+
'frac14': 188,
44+
'frac12': 189,
45+
'frac34': 190,
46+
'iquest': 191,
47+
'Agrave': 192,
48+
'Aacute': 193,
49+
'Acirc': 194,
50+
'Atilde': 195,
51+
'Auml': 196,
52+
'Aring': 197,
53+
'AElig': 198,
54+
'Ccedil': 199,
55+
'Egrave': 200,
56+
'Eacute': 201,
57+
'Ecirc': 202,
58+
'Euml': 203,
59+
'Igrave': 204,
60+
'Iacute': 205,
61+
'Icirc': 206,
62+
'Iuml': 207,
63+
'ETH': 208,
64+
'Ntilde': 209,
65+
'Ograve': 210,
66+
'Oacute': 211,
67+
'Ocirc': 212,
68+
'Otilde': 213,
69+
'Ouml': 214,
70+
'times': 215,
71+
'Oslash': 216,
72+
'Ugrave': 217,
73+
'Uacute': 218,
74+
'Ucirc': 219,
75+
'Uuml': 220,
76+
'Yacute': 221,
77+
'THORN': 222,
78+
'szlig': 223,
79+
'agrave': 224,
80+
'aacute': 225,
81+
'acirc': 226,
82+
'atilde': 227,
83+
'auml': 228,
84+
'aring': 229,
85+
'aelig': 230,
86+
'ccedil': 231,
87+
'egrave': 232,
88+
'eacute': 233,
89+
'ecirc': 234,
90+
'euml': 235,
91+
'igrave': 236,
92+
'iacute': 237,
93+
'icirc': 238,
94+
'iuml': 239,
95+
'eth': 240,
96+
'ntilde': 241,
97+
'ograve': 242,
98+
'oacute': 243,
99+
'ocirc': 244,
100+
'otilde': 245,
101+
'ouml': 246,
102+
'divide': 247,
103+
'oslash': 248,
104+
'ugrave': 249,
105+
'uacute': 250,
106+
'ucirc': 251,
107+
'uuml': 252,
108+
'yacute': 253,
109+
'thorn': 254,
110+
'yuml': 255,
111+
'OElig': 338,
112+
'oelig': 339,
113+
'Scaron': 352,
114+
'fnof': 402,
115+
'scaron': 353,
116+
'Yuml': 376,
117+
'circ': 710,
118+
'tilde': 732,
119+
'Alpha': 913,
120+
'Beta': 914,
121+
'Gamma': 915,
122+
'Delta': 916,
123+
'Epsilon': 917,
124+
'Zeta': 918,
125+
'Eta': 919,
126+
'Theta': 920,
127+
'Iota': 921,
128+
'Kappa': 922,
129+
'Lambda': 923,
130+
'Mu': 924,
131+
'Nu': 925,
132+
'Xi': 926,
133+
'Omicron': 927,
134+
'Pi': 928,
135+
'Rho': 929,
136+
'Sigma': 931,
137+
'Tau': 932,
138+
'Upsilon': 933,
139+
'Phi': 934,
140+
'Chi': 935,
141+
'Psi': 936,
142+
'Omega': 937,
143+
'alpha': 945,
144+
'beta': 946,
145+
'gamma': 947,
146+
'delta': 948,
147+
'epsilon': 949,
148+
'zeta': 950,
149+
'eta': 951,
150+
'theta': 952,
151+
'iota': 953,
152+
'kappa': 954,
153+
'lambda': 955,
154+
'mu': 956,
155+
'nu': 957,
156+
'xi': 958,
157+
'omicron': 959,
158+
'pi': 960,
159+
'rho': 961,
160+
'sigmaf': 962,
161+
'sigma': 963,
162+
'tau': 964,
163+
'upsilon': 965,
164+
'phi': 966,
165+
'chi': 967,
166+
'psi': 968,
167+
'omega': 969,
168+
'thetasym': 977,
169+
'upsih': 978,
170+
'piv': 982,
171+
'bull': 8226,
172+
'hellip': 8230,
173+
'prime': 8242,
174+
'Prime': 8243,
175+
'oline': 8254,
176+
'frasl': 8260,
177+
'ensp': 8194,
178+
'emsp': 8195,
179+
'thinsp': 8201,
180+
'zwnj': 8204,
181+
'zwj': 8205,
182+
'lrm': 8206,
183+
'rlm': 8207,
184+
'ndash': 8211,
185+
'mdash': 8212,
186+
'lsquo': 8216,
187+
'rsquo': 8217,
188+
'sbquo': 8218,
189+
'ldquo': 8220,
190+
'rdquo': 8221,
191+
'bdquo': 8222,
192+
'dagger': 8224,
193+
'Dagger': 8225,
194+
'permil': 8240,
195+
'lsaquo': 8249,
196+
'rsaquo': 8250,
197+
'euro': 8364,
198+
'weierp': 8472,
199+
'image': 8465,
200+
'real': 8476,
201+
'trade': 8482,
202+
'alefsym': 8501,
203+
'larr': 8592,
204+
'uarr': 8593,
205+
'rarr': 8594,
206+
'darr': 8595,
207+
'harr': 8596,
208+
'crarr': 8629,
209+
'lArr': 8656,
210+
'uArr': 8657,
211+
'rArr': 8658,
212+
'dArr': 8659,
213+
'hArr': 8660,
214+
'forall': 8704,
215+
'part': 8706,
216+
'exist': 8707,
217+
'empty': 8709,
218+
'nabla': 8711,
219+
'isin': 8712,
220+
'notin': 8713,
221+
'ni': 8715,
222+
'prod': 8719,
223+
'sum': 8721,
224+
'minus': 8722,
225+
'lowast': 8727,
226+
'radic': 8730,
227+
'prop': 8733,
228+
'infin': 8734,
229+
'ang': 8736,
230+
'and': 8743,
231+
'or': 8744,
232+
'cap': 8745,
233+
'cup': 8746,
234+
'int': 8747,
235+
'there4': 8756,
236+
'sim': 8764,
237+
'cong': 8773,
238+
'asymp': 8776,
239+
'ne': 8800,
240+
'equiv': 8801,
241+
'le': 8804,
242+
'ge': 8805,
243+
'sub': 8834,
244+
'sup': 8835,
245+
'nsub': 8836,
246+
'sube': 8838,
247+
'supe': 8839,
248+
'oplus': 8853,
249+
'otimes': 8855,
250+
'perp': 8869,
251+
'sdot': 8901,
252+
'lceil': 8968,
253+
'rceil': 8969,
254+
'lfloor': 8970,
255+
'rfloor': 8971,
256+
'lang': 9001,
257+
'rang': 9002,
258+
'loz': 9674,
259+
'spades': 9824,
260+
'clubs': 9827,
261+
'hearts': 9829,
262+
'diams': 9830,
263+
}

lib/request/basic.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from lib.core.data import logger
2424
from lib.core.enums import HTTPHEADER
2525
from lib.core.enums import PLACE
26+
from lib.core.htmlentities import htmlEntities
2627
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
2728
from lib.core.settings import ML
2829
from lib.core.settings import META_CHARSET_REGEX
@@ -215,12 +216,18 @@ def decodePage(page, contentEncoding, contentType):
215216
if "&#" in page:
216217
page = re.sub('&#(\d{1,3});', lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
217218

219+
# e.g. &amp;
220+
page = re.sub('&([^;]+);', lambda _: chr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 256) < 256 else _.group(0), page)
221+
218222
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
219223
page = getUnicode(page, kb.pageEncoding)
220224

221225
# e.g. &#8217;&#8230;&#8482;
222226
if "&#" in page:
223227
page = re.sub('&#(\d+);', lambda _: unichr(int(_.group(1))), page)
228+
229+
# e.g. &zeta;
230+
page = re.sub('&([^;]+);', lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)
224231

225232
return page
226233

0 commit comments

Comments
 (0)