@@ -127,9 +127,14 @@ module EssaFlow {
127127 nodeTo .( EssaNode ) .getVar ( ) .getDefinition ( ) .( AssignmentDefinition ) .getValue ( )
128128 or
129129 // Definition
130- // `a, b = iterable`
130+ // `[ a, b] = iterable`
131131 // nodeFrom = `iterable`, cfg node
132- // nodeTo = `a, b`, cfg node
132+ // nodeTo = `TIterableSequence([a, b])`
133+ exists ( UnpackingAssignmentDirectTarget target |
134+ nodeFrom .asExpr ( ) = target .getValue ( ) and
135+ nodeTo = TIterableSequence ( target )
136+ )
137+ or
133138 exists ( Assign assign , SequenceNode target | target .getNode ( ) = assign .getATarget ( ) |
134139 nodeFrom .asExpr ( ) = assign .getValue ( ) and
135140 nodeTo .asCfgNode ( ) = target
@@ -170,7 +175,7 @@ module EssaFlow {
170175 // If expressions
171176 nodeFrom .asCfgNode ( ) = nodeTo .asCfgNode ( ) .( IfExprNode ) .getAnOperand ( )
172177 or
173- unpackingAssignmentDirectFlowStep ( nodeFrom , nodeTo )
178+ unpackingAssignmentFlowStep ( nodeFrom , nodeTo )
174179 or
175180 // Overflow keyword argument
176181 exists ( CallNode call , CallableValue callable |
@@ -1023,115 +1028,117 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
10231028 )
10241029}
10251030
1031+ /**
1032+ * The unpacking assignment takes the general form
1033+ * ```python
1034+ * sequence = iterable
1035+ * ```
1036+ * where `sequence` is either a tuple or a list and it can contain wildcards.
1037+ * The iterable can be any iterable, which means that content will need to change type
1038+ * if it should be transferred from the LHS to the RHS.
1039+ *
1040+ * We may for instance have
1041+ * ```python
1042+ * (a, b) = ["a", "tainted string"] # RHS has content `ListElement`
1043+ * ```
1044+ * Due to the abstraction for list content, we do not know whether `"tainted string"`
1045+ * ends up in `a` or in `b`, so we want to overapproximate and see it in both.
1046+ *
1047+ * Using wildcards we may have
1048+ * ```python
1049+ * (a, *b) = ("a", "b", "tainted string") # RHS has content `TupleElement(2)`
1050+ * ```
1051+ * Since the starred variables are always assigned type list, `*b` will be
1052+ * `["b", "tainted string]`, and we will agsin overapproximate and assign it
1053+ * content corresponding to anything found in the RHS.
1054+ *
1055+ * For a precise transfer
1056+ * ```python
1057+ * (a, b) = ("a", "tainted string") # RHS has content `TupleElement(1)`
1058+ * ```
1059+ * we wish to keep the precision, so only `b` receives the tuple content at index 1.
1060+ *
1061+ * Finally, `sequence` is actually a pattern and can have a more complicated structure,
1062+ * such as
1063+ * ```python
1064+ * (a, [b, *c]) = ("a", ("tainted string", "c")) # RHS has content `TupleElement(1); TupleElement(0)`
1065+ * ```
1066+ * where `a` should not receive content, but `b` and `c` should. `c` will be `["c"]` so
1067+ * should have the content converted and transferred, while `b` should read it.
1068+ *
1069+ * The strategy for converting content type is to break the transfer up into a read step
1070+ * and a store step, together creating a converting transfer step.
1071+ * For this we need a synthetic node in the middle, which we call `TIterableElement(receiver)`.
1072+ * It is associated with the receiver of the transfer, because we know the receiver type from the syntax.
1073+ * Since we sometimes need a converting read step (in the example above, `[b, *c]` reads the content
1074+ * `TupleElement(0)` but should have content `ListElement`), we actually need a second synthetic node.
1075+ * A converting read step is a read step followed by a converting transfer.
1076+ * We can have a uniform treatment by always having two synthetic nodes and so we can view it as
1077+ * two stages of the same node. So we read into (or transfer to) `TIterableSequence(receiver)`,
1078+ * from which we take a read step to `TIterableElement(receiver)` and then a store step to `receiver`.
1079+ * In order to preserve precise content, we also take a flow step from `TIterableSequence(receiver)`
1080+ * directly to `receiver`.
1081+ *
1082+ * The strategy is then via several read-, store-, and flow steps:
1083+ * 1. [Flow] Content is transferred from `iterable` to `TIterableSequence(sequence)` via a
1084+ * flow step. From here, everything happens on the LHS.
1085+ *
1086+ * 1. [Flow] Content is transferred from `TIterableSequence(sequence)` to `sequence` via a
1087+ * flow step.
1088+ *
1089+ * 1. [Read] Content is read from `TIterableSequence(sequence)` into `TIterableElement(sequence)`.
1090+ * If `sequence` is of type tuple, we will not read tuple content as that would allow
1091+ * cross talk.
1092+ *
1093+ * 1. [Store] Content is stored from `TIterableElement(sequence)` to `sequence`.
1094+ * Here the content type is chosen according to the type of sequence.
1095+ *
1096+ * 1. [Read] Content is read from `sequence` to its elements according to the type of `sequence`.
1097+ * If the element is a plain variable, the target is the corresponding essa node.
1098+ * If the element is itelf a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
1099+ * If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
1100+ *
1101+ * 1. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
1102+ * content type `ListElement`.
1103+ *
1104+ * 1. [Flow, Read, Store] The last 5 steps are repeated for all recursive elements which are sequences.
1105+ */
10261106module unpackinAssignment {
1027- /** Data flows from an iterable to an assigned variable. */
1028- predicate unpackingAssignmentReadStep ( CfgNode nodeFrom , Content c , Node nodeTo ) {
1029- unpackingAssignmentToplevelReadStep ( nodeFrom , c , nodeTo )
1030- or
1031- unpackingAssignmentInternalReadStep ( nodeFrom , c , nodeTo )
1032- or
1033- unpackingAssignmentConvertingReadStep ( nodeFrom , c , nodeTo )
1034- or
1035- unpackingAssignmentConvertingInternalReadStep ( nodeFrom , c , nodeTo )
1036- }
1107+ /** A direct (or top-level) target of an unpacking assignment */
1108+ class UnpackingAssignmentDirectTarget extends ControlFlowNode {
1109+ Expr value ;
10371110
1038- predicate unpackingAssignmentStoreStep ( Node nodeFrom , Content c , CfgNode nodeTo ) {
1039- unpackingAssignmentConvertingStoreStep ( nodeFrom , c , nodeTo )
1040- or
1041- unpackingAssignmentConvertingInternalStoreStep ( nodeFrom , c , nodeTo )
1042- }
1111+ UnpackingAssignmentDirectTarget ( ) {
1112+ this instanceof SequenceNode and
1113+ exists ( Assign assign | this .getNode ( ) = assign .getATarget ( ) | value = assign .getValue ( ) )
1114+ }
10431115
1044- predicate unpackingAssignmentRead ( CfgNode nodeFrom , Content c , ControlFlowNode readNode ) {
1045- // `a, b = iterable`
1046- // nodeFrom = `a, b`
1047- // readNode = `a`
1048- // c is compatible with type of `a, b` (so tuple if it was `(a, b)`)
1049- exists ( Assign assign , SequenceNode target , int index | target .getNode ( ) = assign .getATarget ( ) |
1050- nodeFrom .getNode ( ) = target and
1051- readNode = target .getElement ( index ) and
1052- (
1053- target instanceof ListNode and
1054- c instanceof ListElementContent
1055- or
1056- target instanceof TupleNode and
1057- c .( TupleElementContent ) .getIndex ( ) = index
1058- )
1059- )
1116+ Expr getValue ( ) { result = value }
10601117 }
10611118
1062- predicate unpackingAssignmentInternalReadStep ( CfgNode nodeFrom , Content c , Node nodeTo ) {
1063- // iterable unpacking
1064- // `a, (b, (c, d)) = iterable`
1065- // nodeFrom is `(b, (c, d))`, cfg node
1066- // nodeTo is `b`, essa var
1067- // or `(c, d)`, cfg node
1068- // c is compatible with `b`s (or `(c, d)`s) index
1069- exists (
1070- Assign assign , SequenceNode target , SequenceNode readFrom , int index , ControlFlowNode readTo
1071- |
1072- target .getNode ( ) = assign .getATarget ( ) and
1073- readFrom = target .getAnElement ( ) // use contains to get deeper nesting
1074- |
1075- nodeFrom .getNode ( ) = readFrom and
1076- readTo = readFrom .getElement ( index ) and
1077- (
1078- readTo instanceof SequenceNode and
1079- nodeTo .asCfgNode ( ) = readTo
1080- or
1081- not readTo instanceof SequenceNode and
1082- nodeTo .asVar ( ) .getDefinition ( ) .( MultiAssignmentDefinition ) .getDefiningNode ( ) = readTo
1083- ) and
1084- (
1085- readFrom instanceof ListNode and
1086- c instanceof ListElementContent
1087- or
1088- readFrom instanceof TupleNode and
1089- c .( TupleElementContent ) .getIndex ( ) = index
1090- )
1091- )
1092- }
1119+ /** A (possibly recursive) target of an unpacking assignment */
1120+ class UnpackingAssignmentTarget extends ControlFlowNode {
1121+ UnpackingAssignmentTarget ( ) {
1122+ this instanceof UnpackingAssignmentDirectTarget
1123+ or
1124+ exists ( UnpackingAssignmentTarget parent | this = parent .getAnElement ( ) )
1125+ }
10931126
1094- /** Data flows from an iterable to an assigned variable. */
1095- predicate unpackingAssignmentToplevelReadStep ( CfgNode nodeFrom , Content c , Node nodeTo ) {
1096- // iterable unpacking
1097- // `a, (b, c) = iterable`
1098- // nodeFrom is `a, (b, c)`, cfg node
1099- // nodeTo is `a`, essa var
1100- // or `(b, c)`, cfg node
1101- // c is compatible with `a`s (or `(b, c)`s) index
1102- exists ( ControlFlowNode readNode | unpackingAssignmentRead ( nodeFrom , c , readNode ) |
1103- (
1104- readNode instanceof SequenceNode and
1105- nodeTo .asCfgNode ( ) = readNode
1106- or
1107- not readNode instanceof SequenceNode and
1108- nodeTo .asVar ( ) .getDefinition ( ) .( MultiAssignmentDefinition ) .getDefiningNode ( ) = readNode
1109- )
1110- )
1111- or
1112- unpackingAssignmentInternalReadStep ( nodeFrom , c , nodeTo )
1127+ ControlFlowNode getElement ( int i ) { result = this .( SequenceNode ) .getElement ( i ) }
1128+
1129+ ControlFlowNode getAnElement ( ) { result = this .getElement ( _) }
11131130 }
11141131
1115- predicate unpackingAssignmentDirectFlowStep ( CfgNode nodeFrom , CfgNode nodeTo ) {
1116- // `a, *b = iterable`
1117- // nodeFrom = `a, b`
1118- // nodeTo = `*b`
1119- exists ( Assign assign , SequenceNode target | target .getNode ( ) = assign .getATarget ( ) |
1120- nodeFrom .getNode ( ) = target and
1121- nodeTo .getNode ( ) = target .getAnElement ( ) and
1122- nodeTo .asExpr ( ) instanceof Starred
1132+ predicate unpackingAssignmentFlowStep ( Node nodeFrom , Node nodeTo ) {
1133+ exists ( UnpackingAssignmentTarget target | target instanceof SequenceNode |
1134+ nodeFrom = TIterableSequence ( target ) and
1135+ nodeTo .asCfgNode ( ) = target
11231136 )
11241137 }
11251138
1126- predicate unpackingAssignmentConvertingReadStep ( CfgNode nodeFrom , Content c , Node nodeTo ) {
1127- // iterable unpacking
1128- // `a, b = iterable`
1129- // nodeFrom is `iterable`
1130- // nodeTo is synthetic IterableElement
1131- // c is whatever element content `iterable` might carry
1132- // we wish to consume c, so that we can later write it back in the type of the lhs.
1133- exists ( Assign assign , SequenceNode target | target .getNode ( ) = assign .getATarget ( ) |
1134- nodeFrom .asExpr ( ) = assign .getValue ( ) and
1139+ predicate unpackingAssignmentConvertingReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1140+ exists ( UnpackingAssignmentTarget target | target instanceof SequenceNode |
1141+ nodeFrom = TIterableSequence ( target ) and
11351142 nodeTo = TIterableElement ( target ) and
11361143 (
11371144 c instanceof ListElementContent
@@ -1149,15 +1156,10 @@ module unpackinAssignment {
11491156 )
11501157 }
11511158
1152- predicate unpackingAssignmentConvertingStoreStep ( Node nodeFrom , Content c , CfgNode nodeTo ) {
1153- // iterable unpacking
1154- // `a, b = iterable`
1155- // nodeFrom is synthetic IterableElement
1156- // nodeTo is `a, b`
1157- // c is consistent with the type of the lhs.
1158- exists ( Assign assign , SequenceNode target | target .getNode ( ) = assign .getATarget ( ) |
1159+ predicate unpackingAssignmentConvertingStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1160+ exists ( UnpackingAssignmentTarget target | target instanceof SequenceNode |
11591161 nodeFrom = TIterableElement ( target ) and
1160- nodeTo .getNode ( ) = target and
1162+ nodeTo .asCfgNode ( ) = target and
11611163 (
11621164 target instanceof ListNode and
11631165 c instanceof ListElementContent
@@ -1170,47 +1172,51 @@ module unpackinAssignment {
11701172 )
11711173 }
11721174
1173- predicate unpackingAssignmentConvertingInternalReadStep ( CfgNode nodeFrom , Content c , Node nodeTo ) {
1174- exists ( Assign assign , SequenceNode target , SequenceNode readFrom |
1175- target .getNode ( ) = assign .getATarget ( ) and
1176- readFrom = target .getAnElement ( ) // use contains to get deeper nesting
1175+ predicate unpackingAssignmentElementReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1176+ exists ( UnpackingAssignmentTarget target , int index , ControlFlowNode element |
1177+ target instanceof SequenceNode
11771178 |
1178- nodeFrom .getNode ( ) = readFrom and
1179- nodeTo = TIterableElement ( readFrom ) and
1179+ nodeFrom .asCfgNode ( ) = target and
1180+ element = target . getElement ( index ) and
11801181 (
1182+ target instanceof ListNode and
11811183 c instanceof ListElementContent
11821184 or
1183- c instanceof SetElementContent
1184- or
1185- // do not lose precision by routing tuple content through the `IterableElement`
1186- not readFrom instanceof TupleNode and
1187- // `index` refers to `nodeFrom`, but only the ones in `target` are relevant.
1188- exists ( int index | exists ( readFrom .getElement ( index ) ) |
1189- c .( TupleElementContent ) .getIndex ( ) = index
1190- )
1191- // leaving out dict content for now
1185+ target instanceof TupleNode and
1186+ c .( TupleElementContent ) .getIndex ( ) = index
1187+ ) and
1188+ (
1189+ if element instanceof SequenceNode
1190+ then nodeTo = TIterableSequence ( element )
1191+ else
1192+ if element .getNode ( ) instanceof Starred
1193+ then nodeTo = TIterableElement ( element )
1194+ else
1195+ nodeTo .asVar ( ) .getDefinition ( ) .( MultiAssignmentDefinition ) .getDefiningNode ( ) = element
11921196 )
11931197 )
11941198 }
11951199
1196- predicate unpackingAssignmentConvertingInternalStoreStep ( Node nodeFrom , Content c , CfgNode nodeTo ) {
1197- exists ( Assign assign , SequenceNode target , SequenceNode readFrom |
1198- target .getNode ( ) = assign .getATarget ( ) and
1199- readFrom = target .getAnElement ( ) // use contains to get deeper nesting
1200- |
1201- nodeFrom = TIterableElement ( readFrom ) and
1202- nodeTo .getNode ( ) = readFrom and
1203- (
1204- readFrom instanceof ListNode and
1205- c instanceof ListElementContent
1206- or
1207- readFrom instanceof TupleNode and
1208- exists ( int index | exists ( readFrom .getElement ( index ) ) |
1209- c .( TupleElementContent ) .getIndex ( ) = index
1210- )
1211- )
1200+ predicate unpackingAssignmentStarredElementStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1201+ exists ( ControlFlowNode starred | starred .getNode ( ) instanceof Starred |
1202+ nodeFrom = TIterableElement ( starred ) and
1203+ nodeTo .asVar ( ) .getDefinition ( ) .( MultiAssignmentDefinition ) .getDefiningNode ( ) = starred and
1204+ c instanceof ListElementContent
12121205 )
12131206 }
1207+
1208+ /** Data flows from an iterable to an assigned variable. */
1209+ predicate unpackingAssignmentReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1210+ unpackingAssignmentElementReadStep ( nodeFrom , c , nodeTo )
1211+ or
1212+ unpackingAssignmentConvertingReadStep ( nodeFrom , c , nodeTo )
1213+ }
1214+
1215+ predicate unpackingAssignmentStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1216+ unpackingAssignmentStarredElementStoreStep ( nodeFrom , c , nodeTo )
1217+ or
1218+ unpackingAssignmentConvertingStoreStep ( nodeFrom , c , nodeTo )
1219+ }
12141220}
12151221
12161222import unpackinAssignment
0 commit comments