135135from lib .core .update import update
136136from lib .parse .configfile import configFileParser
137137from lib .parse .payloads import loadPayloads
138+ from lib .parse .sitemap import parseSitemap
138139from lib .request .basic import checkCharEncoding
139140from lib .request .connect import Connect as Request
140141from lib .request .dns import DNSServer
@@ -504,10 +505,13 @@ def _setCrawler():
504505 if not conf .crawlDepth :
505506 return
506507
507- if not conf .bulkFile :
508+ if not any (( conf .bulkFile , conf . sitemapUrl )) :
508509 crawl (conf .url )
509510 else :
510- targets = getFileItems (conf .bulkFile )
511+ if conf .bulkFile :
512+ targets = getFileItems (conf .bulkFile )
513+ else :
514+ targets = parseSitemap (conf .sitemapUrl )
511515 for i in xrange (len (targets )):
512516 try :
513517 target = targets [i ]
@@ -618,10 +622,33 @@ def _setBulkMultipleTargets():
618622 errMsg += "does not exist"
619623 raise SqlmapFilePathException (errMsg )
620624
625+ found = False
621626 for line in getFileItems (conf .bulkFile ):
622627 if re .match (r"[^ ]+\?(.+)" , line , re .I ) or CUSTOM_INJECTION_MARK_CHAR in line :
628+ found = True
623629 kb .targets .add ((line .strip (), None , None , None ))
624630
631+ if not found and not conf .forms and not conf .crawlDepth :
632+ warnMsg = "no usable links found (with GET parameters)"
633+ logger .warn (warnMsg )
634+
635+ def _setSitemapTargets ():
636+ if not conf .sitemapUrl :
637+ return
638+
639+ infoMsg = "parsing sitemap '%s'" % conf .sitemapUrl
640+ logger .info (infoMsg )
641+
642+ found = False
643+ for item in parseSitemap (conf .sitemapUrl ):
644+ if re .match (r"[^ ]+\?(.+)" , item , re .I ):
645+ found = True
646+ kb .targets .add ((item .strip (), None , None , None ))
647+
648+ if not found and not conf .forms and not conf .crawlDepth :
649+ warnMsg = "no usable links found (with GET parameters)"
650+ logger .warn (warnMsg )
651+
625652def _findPageForms ():
626653 if not conf .forms or conf .crawlDepth :
627654 return
@@ -632,11 +659,14 @@ def _findPageForms():
632659 infoMsg = "searching for forms"
633660 logger .info (infoMsg )
634661
635- if not conf .bulkFile :
662+ if not any (( conf .bulkFile , conf . sitemapUrl )) :
636663 page , _ = Request .queryPage (content = True )
637664 findPageForms (page , conf .url , True , True )
638665 else :
639- targets = getFileItems (conf .bulkFile )
666+ if conf .bulkFile :
667+ targets = getFileItems (conf .bulkFile )
668+ else :
669+ targets = parseSitemap (conf .sitemapUrl )
640670 for i in xrange (len (targets )):
641671 try :
642672 target = targets [i ]
@@ -1449,13 +1479,16 @@ def _cleanupOptions():
14491479 if conf .dFile :
14501480 conf .dFile = ntToPosixSlashes (normalizePath (conf .dFile ))
14511481
1482+ if conf .sitemapUrl and not conf .sitemapUrl .lower ().startswith ("http" ):
1483+ conf .sitemapUrl = "http%s://%s" % ('s' if conf .forceSSL else '' , conf .sitemapUrl )
1484+
14521485 if conf .msfPath :
14531486 conf .msfPath = ntToPosixSlashes (normalizePath (conf .msfPath ))
14541487
14551488 if conf .tmpPath :
14561489 conf .tmpPath = ntToPosixSlashes (normalizePath (conf .tmpPath ))
14571490
1458- if conf .googleDork or conf .logFile or conf .bulkFile or conf .forms or conf .crawlDepth :
1491+ if any (( conf .googleDork , conf .logFile , conf .bulkFile , conf . sitemapUrl , conf .forms , conf .crawlDepth )) :
14591492 conf .multipleTargets = True
14601493
14611494 if conf .optimize :
@@ -1631,6 +1664,7 @@ def _setKnowledgeBaseAttributes(flushAll=True):
16311664 kb .extendTests = None
16321665 kb .errorIsNone = True
16331666 kb .fileReadMode = False
1667+ kb .followSitemapRecursion = None
16341668 kb .forcedDbms = None
16351669 kb .forcePartialUnion = False
16361670 kb .headersFp = {}
@@ -2130,8 +2164,8 @@ def _basicOptionValidation():
21302164 errMsg = "maximum number of used threads is %d avoiding potential connection issues" % MAX_NUMBER_OF_THREADS
21312165 raise SqlmapSyntaxException (errMsg )
21322166
2133- if conf .forms and not any ((conf .url , conf .bulkFile )):
2134- errMsg = "switch '--forms' requires usage of option '-u' ('--url') or '-m '"
2167+ if conf .forms and not any ((conf .url , conf .bulkFile , conf . sitemapUrl )):
2168+ errMsg = "switch '--forms' requires usage of option '-u' ('--url'), '-m' or '-x '"
21352169 raise SqlmapSyntaxException (errMsg )
21362170
21372171 if conf .requestFile and conf .url and conf .url != DUMMY_URL :
@@ -2266,7 +2300,7 @@ def init():
22662300 parseTargetUrl ()
22672301 parseTargetDirect ()
22682302
2269- if any ((conf .url , conf .logFile , conf .bulkFile , conf .requestFile , conf .googleDork , conf .liveTest )):
2303+ if any ((conf .url , conf .logFile , conf .bulkFile , conf .sitemapUrl , conf . requestFile , conf .googleDork , conf .liveTest )):
22702304 _setHTTPTimeout ()
22712305 _setHTTPExtraHeaders ()
22722306 _setHTTPCookies ()
@@ -2279,6 +2313,7 @@ def init():
22792313 _setSafeUrl ()
22802314 _setGoogleDorking ()
22812315 _setBulkMultipleTargets ()
2316+ _setSitemapTargets ()
22822317 _urllib2Opener ()
22832318 _checkTor ()
22842319 _setCrawler ()
0 commit comments