1515
1616from operator import itemgetter
1717
18- def main ():
19-
20- TIMEOUT = 10
21- CONFIG_FILE = 'sqlharvest.cfg'
22- TABLES_FILE = 'tables.txt'
23- USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; AskTB5.3)'
24- SEARCH_URL = 'http://www.google.com/m?source=mobileproducts&dc=gorganic'
25- MAX_FILE_SIZE = 2 * 1024 * 1024 # if a result (.sql) file for downloading is more than 2MB in size just skip it
26- QUERY = 'CREATE TABLE ext:sql'
27- REGEX_URLS = r';u=([^"]+)'
28- REGEX_RESULT = r'CREATE TABLE\s*(/\*.*\*/)?\s*(IF NOT EXISTS)?\s*(?P<result>[^\(;]+)'
18+ TIMEOUT = 10
19+ CONFIG_FILE = 'sqlharvest.cfg'
20+ TABLES_FILE = 'tables.txt'
21+ USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; AskTB5.3)'
22+ SEARCH_URL = 'http://www.google.com/m?source=mobileproducts&dc=gorganic'
23+ MAX_FILE_SIZE = 2 * 1024 * 1024 # if a result (.sql) file for downloading is more than 2MB in size just skip it
24+ QUERY = 'CREATE TABLE ext:sql'
25+ REGEX_URLS = r';u=([^"]+?)&q='
26+ REGEX_RESULT = r'(?i)CREATE TABLE\s*(/\*.*\*/)?\s*(IF NOT EXISTS)?\s*(?P<result>[^\(;]+)'
2927
28+ def main ():
3029 tables = dict ()
31- refiles = re .compile (REGEX_URLS )
32- retables = re .compile (REGEX_RESULT , re .I )
33-
3430 cookies = cookielib .CookieJar ()
3531 cookie_processor = urllib2 .HTTPCookieProcessor (cookies )
3632 opener = urllib2 .build_opener (cookie_processor )
37- opener .addheaders = [(' User-Agent' , USER_AGENT )]
33+ opener .addheaders = [(" User-Agent" , USER_AGENT )]
3834
3935 conn = opener .open (SEARCH_URL )
4036 page = conn .read () #set initial cookie values
4137
4238 config = ConfigParser .ConfigParser ()
4339 config .read (CONFIG_FILE )
44- if not config .has_section ('options' ):
45- config .add_section ('options' )
4640
47- if not config .has_option ('options' , 'index' ):
48- config .set ('options' , 'index' , '0' )
41+ if not config .has_section ("options" ):
42+ config .add_section ("options" )
43+ if not config .has_option ("options" , "index" ):
44+ config .set ("options" , "index" , "0" )
4945
50- i = int (config .get (' options' , ' index' ))
46+ i = int (config .get (" options" , " index" ))
5147
5248 try :
53- f = open (TABLES_FILE , 'r' )
54- for line in f .xreadlines ():
55- if len (line ) > 0 and ',' in line :
56- temp = line .split (',' )
57- tables [temp [0 ]] = int (temp [1 ])
58- f .close ()
49+ with open (TABLES_FILE , 'r' ) as f :
50+ for line in f .xreadlines ():
51+ if len (line ) > 0 and ',' in line :
52+ temp = line .split (',' )
53+ tables [temp [0 ]] = int (temp [1 ])
5954 except :
6055 pass
6156
6257 socket .setdefaulttimeout (TIMEOUT )
6358
64- files , oldFiles = None , None
59+ files , old_files = None , None
6560 try :
6661 while True :
6762 abort = False
68- oldFiles = files
63+ old_files = files
6964 files = []
7065
7166 try :
72- conn = opener .open (' %s&q=%s&start=%d&sa=N' % (SEARCH_URL , QUERY .replace (' ' , '+' ), i * 10 ))
67+ conn = opener .open (" %s&q=%s&start=%d&sa=N" % (SEARCH_URL , QUERY .replace (' ' , '+' ), i * 10 ))
7368 page = conn .read ()
74- for match in refiles .finditer (page ):
69+ for match in re .finditer (REGEX_URLS , page ):
7570 files .append (urllib .unquote (match .group (1 )))
76- if len (files ) >= 10 : break
77- abort = (files == oldFiles )
71+ if len (files ) >= 10 :
72+ break
73+ abort = (files == old_files )
7874
7975 except KeyboardInterrupt :
8076 raise
@@ -91,23 +87,24 @@ def main():
9187
9288 for sqlfile in files :
9389 print sqlfile
90+
9491 try :
9592 req = urllib2 .Request (sqlfile )
9693 response = urllib2 .urlopen (req )
9794
98- if response .headers .has_key (' Content-Length' ):
99- if int (response .headers .get (' Content-Length' )) > MAX_FILE_SIZE :
95+ if response .headers .has_key (" Content-Length" ):
96+ if int (response .headers .get (" Content-Length" )) > MAX_FILE_SIZE :
10097 continue
10198
10299 page = response .read ()
103100 found = False
104101 counter = 0
105102
106- for match in retables .finditer (page ):
103+ for match in re .finditer (REGEX_RESULT , page ):
107104 counter += 1
108- table = match .group ("result" ).strip ().strip ("`" ). strip ( " \" " ). strip ( " '" ).replace ('"."' , "." ).replace ("].[" , "." ).strip ('[' ). strip ( ' ]' )
105+ table = match .group ("result" ).strip ().strip ("`\" '" ).replace ('"."' , "." ).replace ("].[" , "." ).strip ('[]' )
109106
110- if table and '>' not in table and '<' not in table and '--' not in table and ' ' not in table :
107+ if table and not any ( _ in table for _ in ( '>' , '<' , '--' , ' ' )) :
111108 found = True
112109 sys .stdout .write ('*' )
113110
@@ -131,19 +128,14 @@ def main():
131128 pass
132129
133130 finally :
134- f = open (TABLES_FILE , 'w+' )
135-
136- tables = sorted (tables .items (), key = itemgetter (1 ), reverse = True )
137-
138- for table , count in tables :
139- f .write ("%s,%d\n " % (table , count ))
140-
141- f .close ()
142- config .set ('options' , 'index' , str (i + 1 ))
143-
144- f = open (CONFIG_FILE , 'w+' )
145- config .write (f )
146- f .close ()
131+ with open (TABLES_FILE , 'w+' ) as f :
132+ tables = sorted (tables .items (), key = itemgetter (1 ), reverse = True )
133+ for table , count in tables :
134+ f .write ("%s,%d\n " % (table , count ))
135+
136+ config .set ("options" , "index" , str (i + 1 ))
137+ with open (CONFIG_FILE , 'w+' ) as f :
138+ config .write (f )
147139
148140if __name__ == "__main__" :
149141 main ()
0 commit comments