1616from lib .core .common import getUnicode
1717from lib .core .common import readInput
1818from lib .core .convert import htmlunescape
19+ from lib .core .convert import urldecode
1920from lib .core .convert import urlencode
2021from lib .core .data import conf
2122from lib .core .data import kb
2223from lib .core .data import logger
2324from lib .core .exception import sqlmapConnectionException
2425from lib .core .exception import sqlmapGenericException
26+ from lib .core .settings import GOOGLE_REGEX
2527from lib .core .settings import UNICODE_ENCODING
2628from lib .core .settings import URI_INJECTABLE_REGEX
2729from lib .request .basic import decodePage
@@ -33,43 +35,41 @@ class Google:
3335 """
3436
3537 def __init__ (self , handlers ):
36- self .__matches = []
37- self .__cj = cookielib .LWPCookieJar ()
38+ self ._matches = []
39+ self ._cj = cookielib .LWPCookieJar ()
3840
39- handlers .append (urllib2 .HTTPCookieProcessor (self .__cj ))
41+ handlers .append (urllib2 .HTTPCookieProcessor (self ._cj ))
4042
4143 self .opener = urllib2 .build_opener (* handlers )
4244 self .opener .addheaders = conf .httpHeaders
4345
44- def __parsePage (self , page ):
46+ def _parsePage (self , page ):
4547 """
4648 Parse Google dork search results page to get the list of
4749 HTTP addresses
4850 """
4951
50- matches = []
52+ retVal = re . findall ( GOOGLE_REGEX , page , re . I | re . S )
5153
52- regExpr = r'h3 class="?r"?><a href="(http[s]?://[^"]+?)"\s(class="?l"?|onmousedown=)'
53- matches = re .findall (regExpr , page , re .I | re .S )
54-
55- return [match [0 ] for match in matches ]
54+ return retVal
5655
5756 def getTargetUrls (self ):
5857 """
5958 This method returns the list of hosts with parameters out of
6059 your Google dork search results
6160 """
6261
63- for match in self .__matches :
64- if re .search (r"(.*?)\?(.+)" , match ):
65- kb .targetUrls .add (( htmlunescape (htmlunescape (match )), None , None , None ))
66- elif re .search (URI_INJECTABLE_REGEX , match , re .I ):
62+ for _ in self ._matches :
63+ _ = urldecode (_ )
64+ if re .search (r"(.*?)\?(.+)" , _ ):
65+ kb .targetUrls .add ((_ , None , None , None ))
66+ elif re .search (URI_INJECTABLE_REGEX , _ , re .I ):
6767 if kb .scanOnlyGoogleGETs is None :
6868 message = "do you want to scan only results containing GET parameters? [Y/n] "
6969 test = readInput (message , default = "Y" )
7070 kb .scanOnlyGoogleGETs = test .lower () != 'n'
7171 if not kb .scanOnlyGoogleGETs :
72- kb .targetUrls .add (( htmlunescape ( htmlunescape ( "%s" % match )) , None , None , None ))
72+ kb .targetUrls .add ((_ , None , None , None ))
7373
7474 def getCookie (self ):
7575 """
@@ -138,11 +138,11 @@ def search(self, googleDork):
138138 errMsg = "unable to connect to Google"
139139 raise sqlmapConnectionException , errMsg
140140
141- self .__matches = self .__parsePage (page )
141+ self ._matches = self ._parsePage (page )
142142
143- if not self .__matches and "detected unusual traffic" in page :
143+ if not self ._matches and "detected unusual traffic" in page :
144144 warnMsg = "Google has detected 'unusual' traffic from "
145145 warnMsg += "this computer disabling further searches"
146146 raise sqlmapGenericException , warnMsg
147147
148- return self .__matches
148+ return self ._matches
0 commit comments