@@ -2012,14 +2012,17 @@ def getPageWordSet(page):
20122012 retVal = set ()
20132013
20142014 # only if the page's charset has been successfully identified
2015- if isinstance (page , six .text_type ):
2015+ if isinstance (page , six .string_types ):
20162016 retVal = set (_ .group (0 ) for _ in re .finditer (r"\w+" , getFilteredPageContent (page )))
20172017
20182018 return retVal
20192019
2020- def showStaticWords (firstPage , secondPage ):
2020+ def showStaticWords (firstPage , secondPage , minLength = 3 ):
20212021 """
20222022 Prints words appearing in two different response pages
2023+
2024+ >>> showStaticWords("this is a test", "this is another test")
2025+ ['this']
20232026 """
20242027
20252028 infoMsg = "finding static words in longest matching part of dynamic page content"
@@ -2038,19 +2041,20 @@ def showStaticWords(firstPage, secondPage):
20382041 commonWords = None
20392042
20402043 if commonWords :
2041- commonWords = list ( commonWords )
2042- commonWords .sort (lambda a , b : cmp (a .lower (), b .lower ()))
2044+ commonWords = [ _ for _ in commonWords if len ( _ ) >= minLength ]
2045+ commonWords .sort (key = functools . cmp_to_key ( lambda a , b : cmp (a .lower (), b .lower () )))
20432046
20442047 for word in commonWords :
2045- if len (word ) > 2 :
2046- infoMsg += "'%s', " % word
2048+ infoMsg += "'%s', " % word
20472049
20482050 infoMsg = infoMsg .rstrip (", " )
20492051 else :
20502052 infoMsg += "None"
20512053
20522054 logger .info (infoMsg )
20532055
2056+ return commonWords
2057+
20542058def isWindowsDriveLetterPath (filepath ):
20552059 """
20562060 Returns True if given filepath starts with a Windows drive letter
0 commit comments