Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 543454e

Browse files
committed
Python: Model file access from XML parsing
1 parent 386ff53 commit 543454e

6 files changed

Lines changed: 71 additions & 15 deletions

File tree

python/ql/lib/semmle/python/frameworks/Lxml.qll

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,13 +275,38 @@ private module Lxml {
275275
}
276276
}
277277

278+
/**
279+
* A call to `lxml.etree.ElementTree.parse` or `lxml.etree.ElementTree.parseid`, which
280+
* takes either a filename or a file-like object as argument. To capture the filename
281+
* for path-injection, we have this subclass.
282+
*
283+
* See
284+
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
285+
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
286+
*/
287+
private class FileAccessFromLXMLParsing extends LXMLParsing, FileSystemAccess::Range {
288+
FileAccessFromLXMLParsing() {
289+
this = API::moduleImport("lxml").getMember("etree").getMember(["parse", "parseid"]).getACall()
290+
// I considered whether we should try to reduce FPs from people passing file-like
291+
// objects, which will not be a file system access (and couldn't cause a
292+
// path-injection).
293+
//
294+
// I suppose that once we have proper flow-summary support for file-like objects,
295+
// we can make the XXE/XML-bomb sinks allow an access-path, while the
296+
// path-injection sink wouldn't, and then we will not end up with such FPs.
297+
}
298+
299+
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
300+
}
301+
278302
/**
279303
* A call to `lxml.etree.iterparse`
280304
*
281305
* See
282306
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse
283307
*/
284-
private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
308+
private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range,
309+
FileSystemAccess::Range {
285310
LXMLIterparseCall() {
286311
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
287312
}
@@ -303,5 +328,7 @@ private module Lxml {
303328
override predicate mayExecuteInput() { none() }
304329

305330
override DataFlow::Node getOutput() { result = this }
331+
332+
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
306333
}
307334
}

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3306,6 +3306,35 @@ private module StdlibPrivate {
33063306
result = this
33073307
}
33083308
}
3309+
3310+
/**
3311+
* A call to `xml.etree.ElementTree.parse` or `xml.etree.ElementTree.iterparse`, which
3312+
* takes either a filename or a file-like object as argument. To capture the filename
3313+
* for path-injection, we have this subclass.
3314+
*
3315+
* See
3316+
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
3317+
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
3318+
*/
3319+
private class FileAccessFromXMLEtreeParsing extends XMLEtreeParsing, FileSystemAccess::Range {
3320+
FileAccessFromXMLEtreeParsing() {
3321+
this =
3322+
API::moduleImport("xml")
3323+
.getMember("etree")
3324+
.getMember("ElementTree")
3325+
.getMember(["parse", "iterparse"])
3326+
.getACall()
3327+
// I considered whether we should try to reduce FPs from people passing file-like
3328+
// objects, which will not be a file system access (and couldn't cause a
3329+
// path-injection).
3330+
//
3331+
// I suppose that once we have proper flow-summary support for file-like objects,
3332+
// we can make the XXE/XML-bomb sinks allow an access-path, while the
3333+
// path-injection sink wouldn't, and then we will not end up with such FPs.
3334+
}
3335+
3336+
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
3337+
}
33093338
}
33103339

33113340
// ---------------------------------------------------------------------------

python/ql/test/library-tests/frameworks/lxml/parsing.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..)
1818

1919
xml_file = 'xml_file'
20-
lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
21-
lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
20+
lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file
21+
lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file
2222

23-
lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
24-
lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
23+
lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file
24+
lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file
2525

26-
lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
27-
lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
26+
lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
27+
lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
2828

2929
# With default parsers (nothing changed)
3030
parser = lxml.etree.XMLParser()
@@ -63,5 +63,5 @@
6363
# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of
6464
# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O
6565

66-
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
67-
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
66+
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
67+
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file

python/ql/test/library-tests/frameworks/lxml/xpath.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from io import StringIO
33

44
def test_parse():
5-
tree = etree.parse(StringIO('<foo><bar></bar></foo>')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE'
5+
tree = etree.parse(StringIO('<foo><bar></bar></foo>')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE' getAPathArgument=StringIO(..)
66
r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar'
77

88
def test_XPath_class():

python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
ns = {'dc': 'http://purl.org/dc/elements/1.1/'}
33

44
import xml.etree.ElementTree as ET
5-
tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
5+
tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument='country_data.xml'
66
root = tree.getroot()
77

88
root.find(match, namespaces=ns) # $ getXPath=match

python/ql/test/library-tests/frameworks/stdlib/xml_etree.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..)
1717
xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..)
1818

19-
xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..)
20-
xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..)
19+
xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..)
20+
xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..)
2121

22-
xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..)
23-
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..)
22+
xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..)
23+
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..)
2424

2525

2626
# With parsers (no options available to disable/enable security features)

0 commit comments

Comments
 (0)