Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7f7758b

Browse files
committed
Python: rewrite xml sax modeling
1 parent 6dd776b commit 7f7758b

2 files changed

Lines changed: 93 additions & 47 deletions

File tree

python/ql/src/experimental/semmle/python/frameworks/Xml.qll

Lines changed: 89 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -64,32 +64,90 @@ private module Xml {
6464
}
6565
}
6666

67-
/** Gets a reference to a `parser` that has been set a `feature`. */
68-
private DataFlow::Node trackSaxFeature(
69-
DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature
67+
/**
68+
* A call to the `setFeature` method on a XML sax parser.
69+
*
70+
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
71+
*/
72+
class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
73+
SaxParserSetFeatureCall() {
74+
this =
75+
API::moduleImport("xml")
76+
.getMember("sax")
77+
.getMember("make_parser")
78+
.getReturn()
79+
.getMember("setFeature")
80+
.getACall()
81+
}
82+
83+
// The keyword argument names does not match documentation. I checked (with Python
84+
// 3.9.5) that the names used here actually works.
85+
DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
86+
87+
DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
88+
}
89+
90+
/** Gets a back-reference to the `setFeature` state argument `arg`. */
91+
private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
92+
DataFlow::TypeBackTracker t, DataFlow::Node arg
7093
) {
7194
t.start() and
72-
exists(DataFlow::MethodCallNode featureCall |
73-
featureCall = parser.getAMethodCall("setFeature") and
74-
featureCall.getArg(0).getALocalSource() = feature.getAUse() and
75-
featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and
76-
result = featureCall.getObject()
95+
arg = any(SaxParserSetFeatureCall c).getStateArg() and
96+
result = arg.getALocalSource()
97+
or
98+
exists(DataFlow::TypeBackTracker t2 |
99+
result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
100+
)
101+
}
102+
103+
/** Gets a back-reference to the `setFeature` state argument `arg`. */
104+
DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
105+
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
106+
}
107+
108+
/** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */
109+
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
110+
t.start() and
111+
exists(SaxParserSetFeatureCall call |
112+
call.getFeatureArg() =
113+
API::moduleImport("xml")
114+
.getMember("sax")
115+
.getMember("handler")
116+
.getMember("feature_external_ges")
117+
.getAUse() and
118+
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
119+
.asExpr()
120+
.(BooleanLiteral)
121+
.booleanValue() = true and
122+
result = call.getObject()
77123
)
78124
or
79125
exists(DataFlow::TypeTracker t2 |
80-
t = t2.smallstep(trackSaxFeature(t2, parser, feature), result)
126+
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
127+
) and
128+
// take account of that we can set the feature to False, which makes the parser safe again
129+
not exists(SaxParserSetFeatureCall call |
130+
call.getObject() = result and
131+
call.getFeatureArg() =
132+
API::moduleImport("xml")
133+
.getMember("sax")
134+
.getMember("handler")
135+
.getMember("feature_external_ges")
136+
.getAUse() and
137+
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
138+
.asExpr()
139+
.(BooleanLiteral)
140+
.booleanValue() = false
81141
)
82142
}
83143

84-
/** Gets a reference to a `parser` that has been set a `feature`. */
85-
DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) {
86-
result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature)
144+
/** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */
145+
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
146+
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
87147
}
88148

89149
/**
90-
* Gets a call to `xml.sax.make_parser`.
91-
*
92-
* Given the following example:
150+
* A XML parsing call with a sax parser.
93151
*
94152
* ```py
95153
* BadHandler = MainHandler()
@@ -99,41 +157,27 @@ private module Xml {
99157
* parser.parse(StringIO(xml_content))
100158
* parsed_xml = BadHandler._result
101159
* ```
102-
*
103-
* * `this` would be `xml.sax.make_parser()`.
104-
* * `getAnInput()`'s result would be `StringIO(xml_content)`.
105-
* * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
106160
*/
107-
private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
108-
XMLSaxParser() {
109-
this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall()
161+
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
162+
XMLSaxParsing() {
163+
this =
164+
API::moduleImport("xml")
165+
.getMember("sax")
166+
.getMember("make_parser")
167+
.getReturn()
168+
.getMember("parse")
169+
.getACall()
110170
}
111171

112-
override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) }
172+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
113173

114174
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
115-
exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature |
116-
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
117-
parse.calls(trackSaxFeature(this, feature), "parse") and
118-
parse.getArg(0) = this.getAnInput() // enough to avoid FPs?
119-
|
120-
(kind.isXxe() or kind.isDtdRetrieval()) and
121-
feature = handler.getMember("feature_external_ges")
122-
or
123-
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
124-
)
125-
}
126-
127-
predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) {
128-
exists(API::Node handler, API::Node feature |
129-
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
130-
DataFlow::exprNode(trackSaxFeature(this, feature).asExpr())
131-
.(DataFlow::LocalSourceNode)
132-
.flowsTo(n)
133-
|
134-
(kind.isXxe() or kind.isDtdRetrieval()) and
135-
feature = handler.getMember("feature_external_ges")
136-
)
175+
// always vuln to these
176+
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
177+
or
178+
// can be vuln to other things if features has been turned on
179+
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
180+
(kind.isXxe() or kind.isDtdRetrieval())
137181
}
138182
}
139183

python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,10 @@ subpaths
227227
| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
228228
| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
229229
| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
230+
| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
231+
| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value |
230232
| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
231233
| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
232-
| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
233-
| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
234+
| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
235+
| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
234236
| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |

0 commit comments

Comments
 (0)