@@ -19,6 +19,9 @@ private import semmle.python.ApiGraphs
1919 * - https://lxml.de/tutorial.html
2020 */
2121private module Lxml {
22+ // ---------------------------------------------------------------------------
23+ // XPath
24+ // ---------------------------------------------------------------------------
2225 /**
2326 * A class constructor compiling an XPath expression.
2427 *
@@ -97,4 +100,164 @@ private module Lxml {
97100
98101 override string getName ( ) { result = "lxml.etree" }
99102 }
103+
104+ // ---------------------------------------------------------------------------
105+ // Parsing
106+ // ---------------------------------------------------------------------------
107+ /**
108+ * Provides models for `lxml.etree` parsers.
109+ *
110+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
111+ */
112+ module XMLParser {
113+ /**
114+ * A source of instances of `lxml.etree` parsers, extend this class to model new instances.
115+ *
116+ * This can include instantiations of the class, return values from function
117+ * calls, or a special parameter that will be set when functions are called by an external
118+ * library.
119+ *
120+ * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
121+ */
122+ abstract class InstanceSource extends DataFlow:: LocalSourceNode {
123+ /** Holds if this instance is vulnerable to `kind`. */
124+ abstract predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) ;
125+ }
126+
127+ /**
128+ * A call to `lxml.etree.XMLParser`.
129+ *
130+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
131+ */
132+ private class LXMLParser extends InstanceSource , DataFlow:: CallCfgNode {
133+ LXMLParser ( ) {
134+ this = API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "XMLParser" ) .getACall ( )
135+ }
136+
137+ // NOTE: it's not possible to change settings of a parser after constructing it
138+ override predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
139+ kind .isXxe ( ) and
140+ (
141+ // resolve_entities has default True
142+ not exists ( this .getArgByName ( "resolve_entities" ) )
143+ or
144+ this .getArgByName ( "resolve_entities" ) .getALocalSource ( ) .asExpr ( ) = any ( True t )
145+ )
146+ or
147+ ( kind .isBillionLaughs ( ) or kind .isQuadraticBlowup ( ) ) and
148+ this .getArgByName ( "huge_tree" ) .getALocalSource ( ) .asExpr ( ) = any ( True t ) and
149+ not this .getArgByName ( "resolve_entities" ) .getALocalSource ( ) .asExpr ( ) = any ( False t )
150+ or
151+ kind .isDtdRetrieval ( ) and
152+ this .getArgByName ( "load_dtd" ) .getALocalSource ( ) .asExpr ( ) = any ( True t ) and
153+ this .getArgByName ( "no_network" ) .getALocalSource ( ) .asExpr ( ) = any ( False t )
154+ }
155+ }
156+
157+ /**
158+ * A call to `lxml.etree.get_default_parser`.
159+ *
160+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
161+ */
162+ private class LXMLDefaultParser extends InstanceSource , DataFlow:: CallCfgNode {
163+ LXMLDefaultParser ( ) {
164+ this =
165+ API:: moduleImport ( "lxml" ) .getMember ( "etree" ) .getMember ( "get_default_parser" ) .getACall ( )
166+ }
167+
168+ override predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
169+ // as highlighted by
170+ // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
171+ // by default XXE is allow. so as long as the default parser has not been
172+ // overridden, the result is also vuln to XXE.
173+ kind .isXxe ( )
174+ // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
175+ }
176+ }
177+
178+ /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
179+ private DataFlow:: TypeTrackingNode instance ( DataFlow:: TypeTracker t , InstanceSource origin ) {
180+ t .start ( ) and
181+ result = origin
182+ or
183+ exists ( DataFlow:: TypeTracker t2 | result = instance ( t2 , origin ) .track ( t2 , t ) )
184+ }
185+
186+ /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
187+ DataFlow:: Node instance ( InstanceSource origin ) {
188+ instance ( DataFlow:: TypeTracker:: end ( ) , origin ) .flowsTo ( result )
189+ }
190+
191+ /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
192+ DataFlow:: Node instanceVulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
193+ exists ( InstanceSource origin | result = instance ( origin ) and origin .vulnerableTo ( kind ) )
194+ }
195+
196+ /**
197+ * A call to the `feed` method of an `lxml` parser.
198+ */
199+ private class LXMLParserFeedCall extends DataFlow:: MethodCallNode , XML:: XMLParsing:: Range {
200+ LXMLParserFeedCall ( ) { this .calls ( instance ( _) , "feed" ) }
201+
202+ override DataFlow:: Node getAnInput ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "data" ) ] }
203+
204+ override predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
205+ this .calls ( instanceVulnerableTo ( kind ) , "feed" )
206+ }
207+
208+ override predicate mayExecuteInput ( ) { none ( ) }
209+
210+ override DataFlow:: Node getOutput ( ) {
211+ exists ( DataFlow:: Node objRef |
212+ DataFlow:: localFlow ( this .getObject ( ) , objRef ) and
213+ result .( DataFlow:: MethodCallNode ) .calls ( objRef , "close" )
214+ )
215+ }
216+ }
217+ }
218+
219+ /**
220+ * A call to either of:
221+ * - `lxml.etree.fromstring`
222+ * - `lxml.etree.fromstringlist`
223+ * - `lxml.etree.XML`
224+ * - `lxml.etree.parse`
225+ * - `lxml.etree.parseid`
226+ *
227+ * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
228+ */
229+ private class LXMLParsing extends DataFlow:: CallCfgNode , XML:: XMLParsing:: Range {
230+ LXMLParsing ( ) {
231+ this =
232+ API:: moduleImport ( "lxml" )
233+ .getMember ( "etree" )
234+ .getMember ( [ "fromstring" , "fromstringlist" , "XML" , "parse" , "parseid" ] )
235+ .getACall ( )
236+ }
237+
238+ override DataFlow:: Node getAnInput ( ) {
239+ result in [
240+ this .getArg ( 0 ) ,
241+ // fromstring / XML
242+ this .getArgByName ( "text" ) ,
243+ // fromstringlist
244+ this .getArgByName ( "strings" ) ,
245+ // parse / parseid
246+ this .getArgByName ( "source" ) ,
247+ ]
248+ }
249+
250+ DataFlow:: Node getParserArg ( ) { result in [ this .getArg ( 1 ) , this .getArgByName ( "parser" ) ] }
251+
252+ override predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
253+ this .getParserArg ( ) = XMLParser:: instanceVulnerableTo ( kind )
254+ or
255+ kind .isXxe ( ) and
256+ not exists ( this .getParserArg ( ) )
257+ }
258+
259+ override predicate mayExecuteInput ( ) { none ( ) }
260+
261+ override DataFlow:: Node getOutput ( ) { result = this }
262+ }
100263}
0 commit comments