Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e112697

Browse files
committed
Python: Promote xml.sax and xml.dom.* modeling
1 parent 05bb0ef commit e112697

4 files changed

Lines changed: 214 additions & 210 deletions

File tree

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3346,6 +3346,220 @@ private module StdlibPrivate {
33463346

33473347
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
33483348
}
3349+
3350+
// ---------------------------------------------------------------------------
3351+
// xml.sax
3352+
// ---------------------------------------------------------------------------
3353+
/**
3354+
* A call to the `setFeature` method on a XML sax parser.
3355+
*
3356+
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
3357+
*/
3358+
private class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
3359+
SaxParserSetFeatureCall() {
3360+
this =
3361+
API::moduleImport("xml")
3362+
.getMember("sax")
3363+
.getMember("make_parser")
3364+
.getReturn()
3365+
.getMember("setFeature")
3366+
.getACall()
3367+
}
3368+
3369+
// The keyword argument names does not match documentation. I checked (with Python
3370+
// 3.9.5) that the names used here actually works.
3371+
DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
3372+
3373+
DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
3374+
}
3375+
3376+
/** Gets a back-reference to the `setFeature` state argument `arg`. */
3377+
private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
3378+
DataFlow::TypeBackTracker t, DataFlow::Node arg
3379+
) {
3380+
t.start() and
3381+
arg = any(SaxParserSetFeatureCall c).getStateArg() and
3382+
result = arg.getALocalSource()
3383+
or
3384+
exists(DataFlow::TypeBackTracker t2 |
3385+
result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
3386+
)
3387+
}
3388+
3389+
/** Gets a back-reference to the `setFeature` state argument `arg`. */
3390+
DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
3391+
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
3392+
}
3393+
3394+
/**
3395+
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
3396+
*
3397+
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
3398+
*/
3399+
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
3400+
t.start() and
3401+
exists(SaxParserSetFeatureCall call |
3402+
call.getFeatureArg() =
3403+
API::moduleImport("xml")
3404+
.getMember("sax")
3405+
.getMember("handler")
3406+
.getMember("feature_external_ges")
3407+
.getAUse() and
3408+
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
3409+
.asExpr()
3410+
.(BooleanLiteral)
3411+
.booleanValue() = true and
3412+
result = call.getObject()
3413+
)
3414+
or
3415+
exists(DataFlow::TypeTracker t2 |
3416+
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
3417+
) and
3418+
// take account of that we can set the feature to False, which makes the parser safe again
3419+
not exists(SaxParserSetFeatureCall call |
3420+
call.getObject() = result and
3421+
call.getFeatureArg() =
3422+
API::moduleImport("xml")
3423+
.getMember("sax")
3424+
.getMember("handler")
3425+
.getMember("feature_external_ges")
3426+
.getAUse() and
3427+
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
3428+
.asExpr()
3429+
.(BooleanLiteral)
3430+
.booleanValue() = false
3431+
)
3432+
}
3433+
3434+
/**
3435+
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
3436+
*
3437+
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
3438+
*/
3439+
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
3440+
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
3441+
}
3442+
3443+
/**
3444+
* A call to the `parse` method on a SAX XML parser.
3445+
*/
3446+
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
3447+
XMLSaxInstanceParsing() {
3448+
this =
3449+
API::moduleImport("xml")
3450+
.getMember("sax")
3451+
.getMember("make_parser")
3452+
.getReturn()
3453+
.getMember("parse")
3454+
.getACall()
3455+
}
3456+
3457+
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
3458+
3459+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
3460+
// always vuln to these
3461+
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
3462+
or
3463+
// can be vuln to other things if features has been turned on
3464+
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
3465+
(kind.isXxe() or kind.isDtdRetrieval())
3466+
}
3467+
3468+
override predicate mayExecuteInput() { none() }
3469+
3470+
override DataFlow::Node getOutput() {
3471+
// note: the output of parsing with SAX is that the content handler gets the
3472+
// data... but we don't currently model this (it's not trivial to do, and won't
3473+
// really give us any value, at least not as of right now).
3474+
none()
3475+
}
3476+
}
3477+
3478+
/**
3479+
* A call to either `parse` or `parseString` from `xml.sax` module.
3480+
*
3481+
* See:
3482+
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
3483+
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
3484+
*/
3485+
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
3486+
XMLSaxParsing() {
3487+
this =
3488+
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
3489+
}
3490+
3491+
override DataFlow::Node getAnInput() {
3492+
result in [
3493+
this.getArg(0),
3494+
// parseString
3495+
this.getArgByName("string"),
3496+
// parse
3497+
this.getArgByName("source"),
3498+
]
3499+
}
3500+
3501+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
3502+
// always vuln to these
3503+
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
3504+
or
3505+
// can be vuln to other things if features has been turned on
3506+
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
3507+
(kind.isXxe() or kind.isDtdRetrieval())
3508+
}
3509+
3510+
override predicate mayExecuteInput() { none() }
3511+
3512+
override DataFlow::Node getOutput() {
3513+
// note: the output of parsing with SAX is that the content handler gets the
3514+
// data... but we don't currently model this (it's not trivial to do, and won't
3515+
// really give us any value, at least not as of right now).
3516+
none()
3517+
}
3518+
}
3519+
3520+
// ---------------------------------------------------------------------------
3521+
// xml.dom.*
3522+
// ---------------------------------------------------------------------------
3523+
/**
3524+
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
3525+
*
3526+
* Both of these modules are based on SAX parsers.
3527+
*/
3528+
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
3529+
XMLDomParsing() {
3530+
this =
3531+
API::moduleImport("xml")
3532+
.getMember("dom")
3533+
.getMember(["minidom", "pulldom"])
3534+
.getMember(["parse", "parseString"])
3535+
.getACall()
3536+
}
3537+
3538+
override DataFlow::Node getAnInput() {
3539+
result in [
3540+
this.getArg(0),
3541+
// parseString
3542+
this.getArgByName("string"),
3543+
// minidom.parse
3544+
this.getArgByName("file"),
3545+
// pulldom.parse
3546+
this.getArgByName("stream_or_string"),
3547+
]
3548+
}
3549+
3550+
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
3551+
3552+
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
3553+
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
3554+
(kind.isXxe() or kind.isDtdRetrieval())
3555+
or
3556+
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
3557+
}
3558+
3559+
override predicate mayExecuteInput() { none() }
3560+
3561+
override DataFlow::Node getOutput() { result = this }
3562+
}
33493563
}
33503564

33513565
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)