@@ -3346,6 +3346,220 @@ private module StdlibPrivate {
33463346
33473347 override DataFlow:: Node getAPathArgument ( ) { result = this .getAnInput ( ) }
33483348 }
3349+
3350+ // ---------------------------------------------------------------------------
3351+ // xml.sax
3352+ // ---------------------------------------------------------------------------
3353+ /**
3354+ * A call to the `setFeature` method on a XML sax parser.
3355+ *
3356+ * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
3357+ */
3358+ private class SaxParserSetFeatureCall extends DataFlow:: MethodCallNode {
3359+ SaxParserSetFeatureCall ( ) {
3360+ this =
3361+ API:: moduleImport ( "xml" )
3362+ .getMember ( "sax" )
3363+ .getMember ( "make_parser" )
3364+ .getReturn ( )
3365+ .getMember ( "setFeature" )
3366+ .getACall ( )
3367+ }
3368+
3369+ // The keyword argument names does not match documentation. I checked (with Python
3370+ // 3.9.5) that the names used here actually works.
3371+ DataFlow:: Node getFeatureArg ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "name" ) ] }
3372+
3373+ DataFlow:: Node getStateArg ( ) { result in [ this .getArg ( 1 ) , this .getArgByName ( "state" ) ] }
3374+ }
3375+
3376+ /** Gets a back-reference to the `setFeature` state argument `arg`. */
3377+ private DataFlow:: TypeTrackingNode saxParserSetFeatureStateArgBacktracker (
3378+ DataFlow:: TypeBackTracker t , DataFlow:: Node arg
3379+ ) {
3380+ t .start ( ) and
3381+ arg = any ( SaxParserSetFeatureCall c ) .getStateArg ( ) and
3382+ result = arg .getALocalSource ( )
3383+ or
3384+ exists ( DataFlow:: TypeBackTracker t2 |
3385+ result = saxParserSetFeatureStateArgBacktracker ( t2 , arg ) .backtrack ( t2 , t )
3386+ )
3387+ }
3388+
3389+ /** Gets a back-reference to the `setFeature` state argument `arg`. */
3390+ DataFlow:: LocalSourceNode saxParserSetFeatureStateArgBacktracker ( DataFlow:: Node arg ) {
3391+ result = saxParserSetFeatureStateArgBacktracker ( DataFlow:: TypeBackTracker:: end ( ) , arg )
3392+ }
3393+
3394+ /**
3395+ * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
3396+ *
3397+ * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
3398+ */
3399+ private DataFlow:: Node saxParserWithFeatureExternalGesTurnedOn ( DataFlow:: TypeTracker t ) {
3400+ t .start ( ) and
3401+ exists ( SaxParserSetFeatureCall call |
3402+ call .getFeatureArg ( ) =
3403+ API:: moduleImport ( "xml" )
3404+ .getMember ( "sax" )
3405+ .getMember ( "handler" )
3406+ .getMember ( "feature_external_ges" )
3407+ .getAUse ( ) and
3408+ saxParserSetFeatureStateArgBacktracker ( call .getStateArg ( ) )
3409+ .asExpr ( )
3410+ .( BooleanLiteral )
3411+ .booleanValue ( ) = true and
3412+ result = call .getObject ( )
3413+ )
3414+ or
3415+ exists ( DataFlow:: TypeTracker t2 |
3416+ t = t2 .smallstep ( saxParserWithFeatureExternalGesTurnedOn ( t2 ) , result )
3417+ ) and
3418+ // take account of that we can set the feature to False, which makes the parser safe again
3419+ not exists ( SaxParserSetFeatureCall call |
3420+ call .getObject ( ) = result and
3421+ call .getFeatureArg ( ) =
3422+ API:: moduleImport ( "xml" )
3423+ .getMember ( "sax" )
3424+ .getMember ( "handler" )
3425+ .getMember ( "feature_external_ges" )
3426+ .getAUse ( ) and
3427+ saxParserSetFeatureStateArgBacktracker ( call .getStateArg ( ) )
3428+ .asExpr ( )
3429+ .( BooleanLiteral )
3430+ .booleanValue ( ) = false
3431+ )
3432+ }
3433+
3434+ /**
3435+ * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
3436+ *
3437+ * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
3438+ */
3439+ DataFlow:: Node saxParserWithFeatureExternalGesTurnedOn ( ) {
3440+ result = saxParserWithFeatureExternalGesTurnedOn ( DataFlow:: TypeTracker:: end ( ) )
3441+ }
3442+
3443+ /**
3444+ * A call to the `parse` method on a SAX XML parser.
3445+ */
3446+ private class XMLSaxInstanceParsing extends DataFlow:: MethodCallNode , XML:: XMLParsing:: Range {
3447+ XMLSaxInstanceParsing ( ) {
3448+ this =
3449+ API:: moduleImport ( "xml" )
3450+ .getMember ( "sax" )
3451+ .getMember ( "make_parser" )
3452+ .getReturn ( )
3453+ .getMember ( "parse" )
3454+ .getACall ( )
3455+ }
3456+
3457+ override DataFlow:: Node getAnInput ( ) { result in [ this .getArg ( 0 ) , this .getArgByName ( "source" ) ] }
3458+
3459+ override predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
3460+ // always vuln to these
3461+ ( kind .isBillionLaughs ( ) or kind .isQuadraticBlowup ( ) )
3462+ or
3463+ // can be vuln to other things if features has been turned on
3464+ this .getObject ( ) = saxParserWithFeatureExternalGesTurnedOn ( ) and
3465+ ( kind .isXxe ( ) or kind .isDtdRetrieval ( ) )
3466+ }
3467+
3468+ override predicate mayExecuteInput ( ) { none ( ) }
3469+
3470+ override DataFlow:: Node getOutput ( ) {
3471+ // note: the output of parsing with SAX is that the content handler gets the
3472+ // data... but we don't currently model this (it's not trivial to do, and won't
3473+ // really give us any value, at least not as of right now).
3474+ none ( )
3475+ }
3476+ }
3477+
3478+ /**
3479+ * A call to either `parse` or `parseString` from `xml.sax` module.
3480+ *
3481+ * See:
3482+ * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
3483+ * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
3484+ */
3485+ private class XMLSaxParsing extends DataFlow:: MethodCallNode , XML:: XMLParsing:: Range {
3486+ XMLSaxParsing ( ) {
3487+ this =
3488+ API:: moduleImport ( "xml" ) .getMember ( "sax" ) .getMember ( [ "parse" , "parseString" ] ) .getACall ( )
3489+ }
3490+
3491+ override DataFlow:: Node getAnInput ( ) {
3492+ result in [
3493+ this .getArg ( 0 ) ,
3494+ // parseString
3495+ this .getArgByName ( "string" ) ,
3496+ // parse
3497+ this .getArgByName ( "source" ) ,
3498+ ]
3499+ }
3500+
3501+ override predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
3502+ // always vuln to these
3503+ ( kind .isBillionLaughs ( ) or kind .isQuadraticBlowup ( ) )
3504+ or
3505+ // can be vuln to other things if features has been turned on
3506+ this .getObject ( ) = saxParserWithFeatureExternalGesTurnedOn ( ) and
3507+ ( kind .isXxe ( ) or kind .isDtdRetrieval ( ) )
3508+ }
3509+
3510+ override predicate mayExecuteInput ( ) { none ( ) }
3511+
3512+ override DataFlow:: Node getOutput ( ) {
3513+ // note: the output of parsing with SAX is that the content handler gets the
3514+ // data... but we don't currently model this (it's not trivial to do, and won't
3515+ // really give us any value, at least not as of right now).
3516+ none ( )
3517+ }
3518+ }
3519+
3520+ // ---------------------------------------------------------------------------
3521+ // xml.dom.*
3522+ // ---------------------------------------------------------------------------
3523+ /**
3524+ * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
3525+ *
3526+ * Both of these modules are based on SAX parsers.
3527+ */
3528+ private class XMLDomParsing extends DataFlow:: CallCfgNode , XML:: XMLParsing:: Range {
3529+ XMLDomParsing ( ) {
3530+ this =
3531+ API:: moduleImport ( "xml" )
3532+ .getMember ( "dom" )
3533+ .getMember ( [ "minidom" , "pulldom" ] )
3534+ .getMember ( [ "parse" , "parseString" ] )
3535+ .getACall ( )
3536+ }
3537+
3538+ override DataFlow:: Node getAnInput ( ) {
3539+ result in [
3540+ this .getArg ( 0 ) ,
3541+ // parseString
3542+ this .getArgByName ( "string" ) ,
3543+ // minidom.parse
3544+ this .getArgByName ( "file" ) ,
3545+ // pulldom.parse
3546+ this .getArgByName ( "stream_or_string" ) ,
3547+ ]
3548+ }
3549+
3550+ DataFlow:: Node getParserArg ( ) { result in [ this .getArg ( 1 ) , this .getArgByName ( "parser" ) ] }
3551+
3552+ override predicate vulnerableTo ( XML:: XMLParsingVulnerabilityKind kind ) {
3553+ this .getParserArg ( ) = saxParserWithFeatureExternalGesTurnedOn ( ) and
3554+ ( kind .isXxe ( ) or kind .isDtdRetrieval ( ) )
3555+ or
3556+ ( kind .isBillionLaughs ( ) or kind .isQuadraticBlowup ( ) )
3557+ }
3558+
3559+ override predicate mayExecuteInput ( ) { none ( ) }
3560+
3561+ override DataFlow:: Node getOutput ( ) { result = this }
3562+ }
33493563}
33503564
33513565// ---------------------------------------------------------------------------
0 commit comments