Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8ec4bc9

Browse files
committed
revert of the last commit. have to think about it
1 parent 9c093d9 commit 8ec4bc9

1 file changed

Lines changed: 2 additions & 4 deletions

File tree

lib/core/common.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,8 +1399,7 @@ def sanitizeAsciiString(subject):
13991399
def getFilteredPageContent(page, onlyText=True):
14001400
retVal = page
14011401

1402-
# only if the page's charset had been successfully identified
1403-
if isinstance(page, unicode):
1402+
if isinstance(page, basestring):
14041403
retVal = re.sub(r"(?s)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), " ", page)
14051404

14061405
while retVal.find(" ") != -1:
@@ -1413,8 +1412,7 @@ def getFilteredPageContent(page, onlyText=True):
14131412
def getPageTextWordsSet(page):
14141413
retVal = None
14151414

1416-
# only if the page's charset had been successfully identified
1417-
if isinstance(page, unicode):
1415+
if isinstance(page, basestring):
14181416
page = getFilteredPageContent(page)
14191417
retVal = set(re.findall(r"\w+", page))
14201418

0 commit comments

Comments
 (0)