@@ -713,7 +713,14 @@ def getDirs():
713713
714714 return list (directories )
715715
716- def filePathToString (filePath ):
716+ def filePathToSafeString (filePath ):
717+ """
718+ Returns string representation of a given filepath safe for a single filename usage
719+
720+ >>> filePathToSafeString('C:/Windows/system32')
721+ 'C__Windows_system32'
722+ """
723+
717724 retVal = filePath .replace ("/" , "_" ).replace ("\\ " , "_" )
718725 retVal = retVal .replace (" " , "_" ).replace (":" , "_" )
719726
@@ -885,20 +892,32 @@ def readInput(message, default=None, checkBatch=True):
885892def randomRange (start = 0 , stop = 1000 ):
886893 """
887894 Returns random integer value in given range
895+
896+ >>> random.seed(0)
897+ >>> randomRange(1, 500)
898+ 423
888899 """
889900
890901 return int (random .randint (start , stop ))
891902
892903def randomInt (length = 4 ):
893904 """
894905 Returns random integer value with provided number of digits
906+
907+ >>> random.seed(0)
908+ >>> randomInt(6)
909+ 874254
895910 """
896911
897912 return int ("" .join (random .choice (string .digits if _ != 0 else string .digits .replace ('0' , '' )) for _ in xrange (0 , length )))
898913
899914def randomStr (length = 4 , lowercase = False , alphabet = None ):
900915 """
901916 Returns random string value with provided number of characters
917+
918+ >>> random.seed(0)
919+ >>> randomStr(6)
920+ 'RNvnAv'
902921 """
903922
904923 if alphabet :
@@ -913,6 +932,9 @@ def randomStr(length=4, lowercase=False, alphabet=None):
913932def sanitizeStr (value ):
914933 """
915934 Sanitizes string value in respect to newline and line-feed characters
935+
936+ >>> sanitizeStr('foo\\ n\\ rbar')
937+ u'foo bar'
916938 """
917939
918940 return getUnicode (value ).replace ("\n " , " " ).replace ("\r " , "" )
@@ -1214,6 +1236,9 @@ def expandAsteriskForColumns(expression):
12141236def getLimitRange (count , dump = False , plusOne = False ):
12151237 """
12161238 Returns range of values used in limit/offset constructs
1239+
1240+ >>> [_ for _ in getLimitRange(10)]
1241+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
12171242 """
12181243
12191244 retVal = None
@@ -1321,6 +1346,14 @@ def getFileType(filePath):
13211346 return "text" if "ASCII" in _ or "text" in _ else "binary"
13221347
13231348def getCharset (charsetType = None ):
1349+ """
1350+ Returns list with integers representing characters of a given
1351+ charset type appropriate for inference techniques
1352+
1353+ >>> getCharset(CHARSET_TYPE.BINARY)
1354+ [0, 1, 47, 48, 49]
1355+ """
1356+
13241357 asciiTbl = []
13251358
13261359 if charsetType is None :
@@ -1363,6 +1396,9 @@ def getCharset(charsetType=None):
13631396def directoryPath (filepath ):
13641397 """
13651398 Returns directory path for a given filepath
1399+
1400+ >>> directoryPath('/var/log/apache.log')
1401+ '/var/log'
13661402 """
13671403
13681404 retVal = filepath
@@ -1375,6 +1411,9 @@ def directoryPath(filepath):
13751411def normalizePath (filepath ):
13761412 """
13771413 Returns normalized string representation of a given filepath
1414+
1415+ >>> normalizePath('//var///log/apache.log')
1416+ '//var/log/apache.log'
13781417 """
13791418
13801419 retVal = filepath
@@ -1388,6 +1427,9 @@ def normalizePath(filepath):
13881427def safeStringFormat (format_ , params ):
13891428 """
13901429 Avoids problems with inappropriate string format strings
1430+
1431+ >>> safeStringFormat('foobar%d%s', ('1', 2))
1432+ u'foobar12'
13911433 """
13921434
13931435 retVal = format_ .replace ("%d" , "%s" )
@@ -1413,6 +1455,9 @@ def getFilteredPageContent(page, onlyText=True):
14131455 """
14141456 Returns filtered page content without script, style and/or comments
14151457 or all HTML tags
1458+
1459+ >>> getFilteredPageContent(u'<html><title>foobar</title><body>test</body></html>')
1460+ u'foobar test'
14161461 """
14171462
14181463 retVal = page
@@ -1422,13 +1467,16 @@ def getFilteredPageContent(page, onlyText=True):
14221467 retVal = re .sub (r"(?si)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else "" ), " " , page )
14231468 while retVal .find (" " ) != - 1 :
14241469 retVal = retVal .replace (" " , " " )
1425- retVal = htmlunescape (retVal )
1470+ retVal = htmlunescape (retVal . strip () )
14261471
14271472 return retVal
14281473
14291474def getPageWordSet (page ):
14301475 """
14311476 Returns word set used in page content
1477+
1478+ >>> sorted(getPageWordSet(u'<html><title>foobar</title><body>test</body></html>'))
1479+ [u'foobar', u'test']
14321480 """
14331481
14341482 retVal = set ()
@@ -1473,6 +1521,11 @@ def showStaticWords(firstPage, secondPage):
14731521def isWindowsDriveLetterPath (filepath ):
14741522 """
14751523 Returns True if given filepath starts with a Windows drive letter
1524+
1525+ >>> isWindowsDriveLetterPath('C:\\ boot.ini')
1526+ True
1527+ >>> isWindowsDriveLetterPath('/var/log/apache.log')
1528+ False
14761529 """
14771530
14781531 return re .search ("\A[\w]\:" , filepath ) is not None
@@ -1634,6 +1687,9 @@ def stdev(values):
16341687 """
16351688 Computes standard deviation of a list of numbers.
16361689 Reference: http://www.goldb.org/corestats.html
1690+
1691+ >>> stdev([0.9, 0.9, 0.9, 1.0, 0.8, 0.9])
1692+ 0.06324555320336757
16371693 """
16381694
16391695 if not values or len (values ) < 2 :
@@ -1654,6 +1710,9 @@ def stdev(values):
16541710def average (values ):
16551711 """
16561712 Computes the arithmetic mean of a list of numbers.
1713+
1714+ >>> average([0.9, 0.9, 0.9, 1.0, 0.8, 0.9])
1715+ 0.9
16571716 """
16581717
16591718 return (sum (values ) / len (values )) if values else None
@@ -1872,6 +1931,9 @@ def longestCommonPrefix(*sequences):
18721931 """
18731932 Returns longest common prefix occuring in given sequences
18741933 Reference: http://boredzo.org/blog/archives/2007-01-06/longest-common-prefix-in-python-2
1934+
1935+ >>> longestCommonPrefix('foobar', 'fobar')
1936+ 'fo'
18751937 """
18761938
18771939 if len (sequences ) == 1 :
@@ -1904,6 +1966,10 @@ def pushValue(value):
19041966def popValue ():
19051967 """
19061968 Pop value from the stack (thread dependent)
1969+
1970+ >>> pushValue('foobar')
1971+ >>> popValue()
1972+ 'foobar'
19071973 """
19081974
19091975 return getCurrentThreadData ().valueStack .pop ()
@@ -2028,6 +2094,13 @@ def findMultipartPostBoundary(post):
20282094 return retVal
20292095
20302096def urldecode (value , encoding = None , unsafe = "%%&=;+%s" % CUSTOM_INJECTION_MARK_CHAR , convall = False , plusspace = True ):
2097+ """
2098+ URL decodes given value
2099+
2100+ >>> urldecode('AND%201%3E%282%2B3%29%23', convall=True)
2101+ u'AND 1>(2+3)#'
2102+ """
2103+
20312104 result = value
20322105
20332106 if value :
@@ -2044,10 +2117,10 @@ def _(match):
20442117 charset = reduce (lambda x , y : x .replace (y , "" ), unsafe , string .printable )
20452118 char = chr (ord (match .group (1 ).decode ("hex" )))
20462119 return char if char in charset else match .group (0 )
2047- result = re .sub ("%([0-9a-fA-F]{2})" , _ , value )
2048-
2120+ result = value
20492121 if plusspace :
20502122 result = result .replace ("+" , " " ) # plus sign has a special meaning in url encoded data (hence the usage of urllib.unquote_plus in convall case)
2123+ result = re .sub ("%([0-9a-fA-F]{2})" , _ , result )
20512124
20522125 if isinstance (result , str ):
20532126 result = unicode (result , encoding or UNICODE_ENCODING , "replace" )
0 commit comments