Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 386ff53

Browse files
committed
Python: Model lxml.iterparse
1 parent 12cbdcd commit 386ff53

2 files changed

Lines changed: 44 additions & 4 deletions

File tree

python/ql/lib/semmle/python/frameworks/Lxml.qll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,4 +274,34 @@ private module Lxml {
274274
result = this
275275
}
276276
}
277+
278+
/**
279+
* A call to `lxml.etree.iterparse`
280+
*
281+
* See
282+
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse
283+
*/
284+
private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
285+
LXMLIterparseCall() {
286+
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
287+
}
288+
289+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
290+
291+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
292+
// note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O
293+
kind.isXxe()
294+
or
295+
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
296+
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t)
297+
or
298+
kind.isDtdRetrieval() and
299+
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
300+
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
301+
}
302+
303+
override predicate mayExecuteInput() { none() }
304+
305+
override DataFlow::Node getOutput() { result = this }
306+
}
277307
}

python/ql/test/library-tests/frameworks/lxml/parsing.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,15 @@
1616
lxml.etree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..)
1717
lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..)
1818

19-
lxml.etree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
20-
lxml.etree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
19+
xml_file = 'xml_file'
20+
lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
21+
lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
2122

22-
lxml.etree.parseid(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
23-
lxml.etree.parseid(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
23+
lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
24+
lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
25+
26+
lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
27+
lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
2428

2529
# With default parsers (nothing changed)
2630
parser = lxml.etree.XMLParser()
@@ -55,3 +59,9 @@
5559
# DTD retrival vuln (also XXE)
5660
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
5761
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
62+
63+
# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of
64+
# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O
65+
66+
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
67+
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)

0 commit comments

Comments
 (0)