Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 80b5cde

Browse files
committed
Python: Promote lxml parsing modeling
1 parent 3040adf commit 80b5cde

4 files changed

Lines changed: 167 additions & 163 deletions

File tree

python/ql/lib/semmle/python/frameworks/Lxml.qll

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ private import semmle.python.ApiGraphs
1919
* - https://lxml.de/tutorial.html
2020
*/
2121
private module Lxml {
22+
// ---------------------------------------------------------------------------
23+
// XPath
24+
// ---------------------------------------------------------------------------
2225
/**
2326
* A class constructor compiling an XPath expression.
2427
*
@@ -97,4 +100,164 @@ private module Lxml {
97100

98101
override string getName() { result = "lxml.etree" }
99102
}
103+
104+
// ---------------------------------------------------------------------------
105+
// Parsing
106+
// ---------------------------------------------------------------------------
107+
/**
108+
* Provides models for `lxml.etree` parsers.
109+
*
110+
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
111+
*/
112+
module XMLParser {
113+
/**
114+
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
115+
*
116+
* This can include instantiations of the class, return values from function
117+
* calls, or a special parameter that will be set when functions are called by an external
118+
* library.
119+
*
120+
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
121+
*/
122+
abstract class InstanceSource extends DataFlow::LocalSourceNode {
123+
/** Holds if this instance is vulnerable to `kind`. */
124+
abstract predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind);
125+
}
126+
127+
/**
128+
* A call to `lxml.etree.XMLParser`.
129+
*
130+
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
131+
*/
132+
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
133+
LXMLParser() {
134+
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
135+
}
136+
137+
// NOTE: it's not possible to change settings of a parser after constructing it
138+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
139+
kind.isXxe() and
140+
(
141+
// resolve_entities has default True
142+
not exists(this.getArgByName("resolve_entities"))
143+
or
144+
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
145+
)
146+
or
147+
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
148+
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
149+
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
150+
or
151+
kind.isDtdRetrieval() and
152+
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
153+
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
154+
}
155+
}
156+
157+
/**
158+
* A call to `lxml.etree.get_default_parser`.
159+
*
160+
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
161+
*/
162+
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
163+
LXMLDefaultParser() {
164+
this =
165+
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
166+
}
167+
168+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
169+
// as highlighted by
170+
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
171+
// by default XXE is allow. so as long as the default parser has not been
172+
// overridden, the result is also vuln to XXE.
173+
kind.isXxe()
174+
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
175+
}
176+
}
177+
178+
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
179+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
180+
t.start() and
181+
result = origin
182+
or
183+
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
184+
}
185+
186+
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
187+
DataFlow::Node instance(InstanceSource origin) {
188+
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
189+
}
190+
191+
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
192+
DataFlow::Node instanceVulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
193+
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
194+
}
195+
196+
/**
197+
* A call to the `feed` method of an `lxml` parser.
198+
*/
199+
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
200+
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
201+
202+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
203+
204+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
205+
this.calls(instanceVulnerableTo(kind), "feed")
206+
}
207+
208+
override predicate mayExecuteInput() { none() }
209+
210+
override DataFlow::Node getOutput() {
211+
exists(DataFlow::Node objRef |
212+
DataFlow::localFlow(this.getObject(), objRef) and
213+
result.(DataFlow::MethodCallNode).calls(objRef, "close")
214+
)
215+
}
216+
}
217+
}
218+
219+
/**
220+
* A call to either of:
221+
* - `lxml.etree.fromstring`
222+
* - `lxml.etree.fromstringlist`
223+
* - `lxml.etree.XML`
224+
* - `lxml.etree.parse`
225+
* - `lxml.etree.parseid`
226+
*
227+
* See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
228+
*/
229+
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
230+
LXMLParsing() {
231+
this =
232+
API::moduleImport("lxml")
233+
.getMember("etree")
234+
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
235+
.getACall()
236+
}
237+
238+
override DataFlow::Node getAnInput() {
239+
result in [
240+
this.getArg(0),
241+
// fromstring / XML
242+
this.getArgByName("text"),
243+
// fromstringlist
244+
this.getArgByName("strings"),
245+
// parse / parseid
246+
this.getArgByName("source"),
247+
]
248+
}
249+
250+
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
251+
252+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
253+
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
254+
or
255+
kind.isXxe() and
256+
not exists(this.getParserArg())
257+
}
258+
259+
override predicate mayExecuteInput() { none() }
260+
261+
override DataFlow::Node getOutput() { result = this }
262+
}
100263
}

python/ql/src/experimental/semmle/python/frameworks/Xml.qll

Lines changed: 0 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -334,165 +334,6 @@ private module SaxBasedParsing {
334334
}
335335
}
336336

337-
private module Lxml {
338-
/**
339-
* Provides models for `lxml.etree` parsers.
340-
*
341-
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
342-
*/
343-
module XMLParser {
344-
/**
345-
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
346-
*
347-
* This can include instantiations of the class, return values from function
348-
* calls, or a special parameter that will be set when functions are called by an external
349-
* library.
350-
*
351-
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
352-
*/
353-
abstract class InstanceSource extends DataFlow::LocalSourceNode {
354-
/** Holds if this instance is vulnerable to `kind`. */
355-
abstract predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind);
356-
}
357-
358-
/**
359-
* A call to `lxml.etree.XMLParser`.
360-
*
361-
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
362-
*/
363-
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
364-
LXMLParser() {
365-
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
366-
}
367-
368-
// NOTE: it's not possible to change settings of a parser after constructing it
369-
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
370-
kind.isXxe() and
371-
(
372-
// resolve_entities has default True
373-
not exists(this.getArgByName("resolve_entities"))
374-
or
375-
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
376-
)
377-
or
378-
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
379-
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
380-
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
381-
or
382-
kind.isDtdRetrieval() and
383-
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
384-
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
385-
}
386-
}
387-
388-
/**
389-
* A call to `lxml.etree.get_default_parser`.
390-
*
391-
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
392-
*/
393-
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
394-
LXMLDefaultParser() {
395-
this =
396-
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
397-
}
398-
399-
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
400-
// as highlighted by
401-
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
402-
// by default XXE is allow. so as long as the default parser has not been
403-
// overridden, the result is also vuln to XXE.
404-
kind.isXxe()
405-
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
406-
}
407-
}
408-
409-
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
410-
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
411-
t.start() and
412-
result = origin
413-
or
414-
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
415-
}
416-
417-
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
418-
DataFlow::Node instance(InstanceSource origin) {
419-
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
420-
}
421-
422-
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
423-
DataFlow::Node instanceVulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
424-
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
425-
}
426-
427-
/**
428-
* A call to the `feed` method of an `lxml` parser.
429-
*/
430-
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
431-
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
432-
433-
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
434-
435-
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
436-
this.calls(instanceVulnerableTo(kind), "feed")
437-
}
438-
439-
override predicate mayExecuteInput() { none() }
440-
441-
override DataFlow::Node getOutput() {
442-
exists(DataFlow::Node objRef |
443-
DataFlow::localFlow(this.getObject(), objRef) and
444-
result.(DataFlow::MethodCallNode).calls(objRef, "close")
445-
)
446-
}
447-
}
448-
}
449-
450-
/**
451-
* A call to either of:
452-
* - `lxml.etree.fromstring`
453-
* - `lxml.etree.fromstringlist`
454-
* - `lxml.etree.XML`
455-
* - `lxml.etree.parse`
456-
* - `lxml.etree.parseid`
457-
*
458-
* See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
459-
*/
460-
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
461-
LXMLParsing() {
462-
this =
463-
API::moduleImport("lxml")
464-
.getMember("etree")
465-
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
466-
.getACall()
467-
}
468-
469-
override DataFlow::Node getAnInput() {
470-
result in [
471-
this.getArg(0),
472-
// fromstring / XML
473-
this.getArgByName("text"),
474-
// fromstringlist
475-
this.getArgByName("strings"),
476-
// parse / parseid
477-
this.getArgByName("source"),
478-
]
479-
}
480-
481-
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
482-
483-
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
484-
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
485-
or
486-
kind.isXxe() and
487-
not exists(this.getParserArg())
488-
}
489-
490-
override predicate mayExecuteInput() { none() }
491-
492-
override DataFlow::Node getOutput() { result = this }
493-
}
494-
}
495-
496337
private module Xmltodict {
497338
/**
498339
* A call to `xmltodict.parse`.

python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py renamed to python/ql/test/library-tests/frameworks/lxml/parsing.py

File renamed without changes.

python/ql/test/library-tests/frameworks/lxml/xpath.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22
from io import StringIO
33

44
def test_parse():
5-
tree = etree.parse(StringIO('<foo><bar></bar></foo>'))
5+
tree = etree.parse(StringIO('<foo><bar></bar></foo>')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE'
66
r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar'
77

88
def test_XPath_class():
9-
root = etree.XML("<root><a>TEXT</a></root>")
9+
root = etree.XML("<root><a>TEXT</a></root>") # $ decodeFormat=XML decodeInput="<root><a>TEXT</a></root>" decodeOutput=etree.XML(..) xmlVuln='XXE'
1010
find_text = etree.XPath("path") # $ constructedXPath="path"
1111
text = find_text(root)[0]
1212

1313
def test_ETXpath_class():
14-
root = etree.XML("<root><a>TEXT</a></root>")
14+
root = etree.XML("<root><a>TEXT</a></root>") # $ decodeFormat=XML decodeInput="<root><a>TEXT</a></root>" decodeOutput=etree.XML(..) xmlVuln='XXE'
1515
find_text = etree.ETXPath("path") # $ constructedXPath="path"
1616
text = find_text(root)[0]
1717

1818
def test_XPathEvaluator_class():
19-
root = etree.XML("<root><a>TEXT</a></root>")
19+
root = etree.XML("<root><a>TEXT</a></root>") # $ decodeFormat=XML decodeInput="<root><a>TEXT</a></root>" decodeOutput=etree.XML(..) xmlVuln='XXE'
2020
search_root = etree.XPathEvaluator(root)
2121
text = search_root("path")[0] # $ getXPath="path"

0 commit comments

Comments
 (0)