File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1154,14 +1154,14 @@ def __setHTTPUserAgent():
11541154 conf .httpHeaders .append (("User-Agent" , conf .agent ))
11551155
11561156 elif not conf .randomAgent :
1157- addDefaultUserAgent = True
1157+ _ = True
11581158
11591159 for header , _ in conf .httpHeaders :
11601160 if header == "User-Agent" :
1161- addDefaultUserAgent = False
1161+ _ = False
11621162 break
11631163
1164- if addDefaultUserAgent :
1164+ if _ :
11651165 conf .httpHeaders .append (("User-Agent" , __defaultHTTPUserAgent ()))
11661166
11671167 else :
Original file line number Diff line number Diff line change 6363PRINTABLE_CHAR_REGEX = r"[^\x00-\x1f\x7e-\xff]"
6464
6565# regular expression used for extracting results from google search
66- GOOGLE_REGEX = r"url\?q =(http[^>]+)&sa=U& "
66+ GOOGLE_REGEX = r"url\?\w+ =(http[^>]+)&( sa=U|rct=j) "
6767
6868# regular expression used for extracting content from "textual" tags
6969TEXT_TAG_REGEX = r"(?si)<(abbr|acronym|b|blockquote|br|center|cite|code|dt|em|font|h\d|i|li|p|pre|q|strong|sub|sup|td|th|title|tt|u)(?!\w).*?>(?P<result>[^<]+)"
Original file line number Diff line number Diff line change @@ -46,7 +46,7 @@ def _parsePage(self, page):
4646 HTTP addresses
4747 """
4848
49- retVal = re .findall (GOOGLE_REGEX , page , re .I | re .S )
49+ retVal = [ match . group ( 1 ) for match in re .finditer (GOOGLE_REGEX , page , re .I | re .S )]
5050
5151 return retVal
5252
You can’t perform that action at this time.
0 commit comments