Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 36a4a50

Browse files
committed
Python: big refactor and fix tests
Make sure tests are valid Fix wrong test annotations Big refactor to make code readable Big comment to explain code
1 parent b2d95e6 commit 36a4a50

4 files changed

Lines changed: 299 additions & 410 deletions

File tree

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 146 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,14 @@ module EssaFlow {
127127
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
128128
or
129129
// Definition
130-
// `a, b = iterable`
130+
// `[a, b] = iterable`
131131
// nodeFrom = `iterable`, cfg node
132-
// nodeTo = `a, b`, cfg node
132+
// nodeTo = `TIterableSequence([a, b])`
133+
exists(UnpackingAssignmentDirectTarget target |
134+
nodeFrom.asExpr() = target.getValue() and
135+
nodeTo = TIterableSequence(target)
136+
)
137+
or
133138
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
134139
nodeFrom.asExpr() = assign.getValue() and
135140
nodeTo.asCfgNode() = target
@@ -170,7 +175,7 @@ module EssaFlow {
170175
// If expressions
171176
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
172177
or
173-
unpackingAssignmentDirectFlowStep(nodeFrom, nodeTo)
178+
unpackingAssignmentFlowStep(nodeFrom, nodeTo)
174179
or
175180
// Overflow keyword argument
176181
exists(CallNode call, CallableValue callable |
@@ -1023,115 +1028,117 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
10231028
)
10241029
}
10251030

1031+
/**
1032+
* The unpacking assignment takes the general form
1033+
* ```python
1034+
* sequence = iterable
1035+
* ```
1036+
* where `sequence` is either a tuple or a list and it can contain wildcards.
1037+
* The iterable can be any iterable, which means that content will need to change type
1038+
* if it should be transferred from the LHS to the RHS.
1039+
*
1040+
* We may for instance have
1041+
* ```python
1042+
* (a, b) = ["a", "tainted string"] # RHS has content `ListElement`
1043+
* ```
1044+
* Due to the abstraction for list content, we do not know whether `"tainted string"`
1045+
* ends up in `a` or in `b`, so we want to overapproximate and see it in both.
1046+
*
1047+
* Using wildcards we may have
1048+
* ```python
1049+
* (a, *b) = ("a", "b", "tainted string") # RHS has content `TupleElement(2)`
1050+
* ```
1051+
* Since the starred variables are always assigned type list, `*b` will be
1052+
* `["b", "tainted string]`, and we will agsin overapproximate and assign it
1053+
* content corresponding to anything found in the RHS.
1054+
*
1055+
* For a precise transfer
1056+
* ```python
1057+
* (a, b) = ("a", "tainted string") # RHS has content `TupleElement(1)`
1058+
* ```
1059+
* we wish to keep the precision, so only `b` receives the tuple content at index 1.
1060+
*
1061+
* Finally, `sequence` is actually a pattern and can have a more complicated structure,
1062+
* such as
1063+
* ```python
1064+
* (a, [b, *c]) = ("a", ("tainted string", "c")) # RHS has content `TupleElement(1); TupleElement(0)`
1065+
* ```
1066+
* where `a` should not receive content, but `b` and `c` should. `c` will be `["c"]` so
1067+
* should have the content converted and transferred, while `b` should read it.
1068+
*
1069+
* The strategy for converting content type is to break the transfer up into a read step
1070+
* and a store step, together creating a converting transfer step.
1071+
* For this we need a synthetic node in the middle, which we call `TIterableElement(receiver)`.
1072+
* It is associated with the receiver of the transfer, because we know the receiver type from the syntax.
1073+
* Since we sometimes need a converting read step (in the example above, `[b, *c]` reads the content
1074+
* `TupleElement(0)` but should have content `ListElement`), we actually need a second synthetic node.
1075+
* A converting read step is a read step followed by a converting transfer.
1076+
* We can have a uniform treatment by always having two synthetic nodes and so we can view it as
1077+
* two stages of the same node. So we read into (or transfer to) `TIterableSequence(receiver)`,
1078+
* from which we take a read step to `TIterableElement(receiver)` and then a store step to `receiver`.
1079+
* In order to preserve precise content, we also take a flow step from `TIterableSequence(receiver)`
1080+
* directly to `receiver`.
1081+
*
1082+
* The strategy is then via several read-, store-, and flow steps:
1083+
* 1. [Flow] Content is transferred from `iterable` to `TIterableSequence(sequence)` via a
1084+
* flow step. From here, everything happens on the LHS.
1085+
*
1086+
* 1. [Flow] Content is transferred from `TIterableSequence(sequence)` to `sequence` via a
1087+
* flow step.
1088+
*
1089+
* 1. [Read] Content is read from `TIterableSequence(sequence)` into `TIterableElement(sequence)`.
1090+
* If `sequence` is of type tuple, we will not read tuple content as that would allow
1091+
* cross talk.
1092+
*
1093+
* 1. [Store] Content is stored from `TIterableElement(sequence)` to `sequence`.
1094+
* Here the content type is chosen according to the type of sequence.
1095+
*
1096+
* 1. [Read] Content is read from `sequence` to its elements according to the type of `sequence`.
1097+
* If the element is a plain variable, the target is the corresponding essa node.
1098+
* If the element is itelf a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
1099+
* If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
1100+
*
1101+
* 1. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
1102+
* content type `ListElement`.
1103+
*
1104+
* 1. [Flow, Read, Store] The last 5 steps are repeated for all recursive elements which are sequences.
1105+
*/
10261106
module unpackinAssignment {
1027-
/** Data flows from an iterable to an assigned variable. */
1028-
predicate unpackingAssignmentReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1029-
unpackingAssignmentToplevelReadStep(nodeFrom, c, nodeTo)
1030-
or
1031-
unpackingAssignmentInternalReadStep(nodeFrom, c, nodeTo)
1032-
or
1033-
unpackingAssignmentConvertingReadStep(nodeFrom, c, nodeTo)
1034-
or
1035-
unpackingAssignmentConvertingInternalReadStep(nodeFrom, c, nodeTo)
1036-
}
1107+
/** A direct (or top-level) target of an unpacking assignment */
1108+
class UnpackingAssignmentDirectTarget extends ControlFlowNode {
1109+
Expr value;
10371110

1038-
predicate unpackingAssignmentStoreStep(Node nodeFrom, Content c, CfgNode nodeTo) {
1039-
unpackingAssignmentConvertingStoreStep(nodeFrom, c, nodeTo)
1040-
or
1041-
unpackingAssignmentConvertingInternalStoreStep(nodeFrom, c, nodeTo)
1042-
}
1111+
UnpackingAssignmentDirectTarget() {
1112+
this instanceof SequenceNode and
1113+
exists(Assign assign | this.getNode() = assign.getATarget() | value = assign.getValue())
1114+
}
10431115

1044-
predicate unpackingAssignmentRead(CfgNode nodeFrom, Content c, ControlFlowNode readNode) {
1045-
// `a, b = iterable`
1046-
// nodeFrom = `a, b`
1047-
// readNode = `a`
1048-
// c is compatible with type of `a, b` (so tuple if it was `(a, b)`)
1049-
exists(Assign assign, SequenceNode target, int index | target.getNode() = assign.getATarget() |
1050-
nodeFrom.getNode() = target and
1051-
readNode = target.getElement(index) and
1052-
(
1053-
target instanceof ListNode and
1054-
c instanceof ListElementContent
1055-
or
1056-
target instanceof TupleNode and
1057-
c.(TupleElementContent).getIndex() = index
1058-
)
1059-
)
1116+
Expr getValue() { result = value }
10601117
}
10611118

1062-
predicate unpackingAssignmentInternalReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1063-
// iterable unpacking
1064-
// `a, (b, (c, d)) = iterable`
1065-
// nodeFrom is `(b, (c, d))`, cfg node
1066-
// nodeTo is `b`, essa var
1067-
// or `(c, d)`, cfg node
1068-
// c is compatible with `b`s (or `(c, d)`s) index
1069-
exists(
1070-
Assign assign, SequenceNode target, SequenceNode readFrom, int index, ControlFlowNode readTo
1071-
|
1072-
target.getNode() = assign.getATarget() and
1073-
readFrom = target.getAnElement() // use contains to get deeper nesting
1074-
|
1075-
nodeFrom.getNode() = readFrom and
1076-
readTo = readFrom.getElement(index) and
1077-
(
1078-
readTo instanceof SequenceNode and
1079-
nodeTo.asCfgNode() = readTo
1080-
or
1081-
not readTo instanceof SequenceNode and
1082-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = readTo
1083-
) and
1084-
(
1085-
readFrom instanceof ListNode and
1086-
c instanceof ListElementContent
1087-
or
1088-
readFrom instanceof TupleNode and
1089-
c.(TupleElementContent).getIndex() = index
1090-
)
1091-
)
1092-
}
1119+
/** A (possibly recursive) target of an unpacking assignment */
1120+
class UnpackingAssignmentTarget extends ControlFlowNode {
1121+
UnpackingAssignmentTarget() {
1122+
this instanceof UnpackingAssignmentDirectTarget
1123+
or
1124+
exists(UnpackingAssignmentTarget parent | this = parent.getAnElement())
1125+
}
10931126

1094-
/** Data flows from an iterable to an assigned variable. */
1095-
predicate unpackingAssignmentToplevelReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1096-
// iterable unpacking
1097-
// `a, (b, c) = iterable`
1098-
// nodeFrom is `a, (b, c)`, cfg node
1099-
// nodeTo is `a`, essa var
1100-
// or `(b, c)`, cfg node
1101-
// c is compatible with `a`s (or `(b, c)`s) index
1102-
exists(ControlFlowNode readNode | unpackingAssignmentRead(nodeFrom, c, readNode) |
1103-
(
1104-
readNode instanceof SequenceNode and
1105-
nodeTo.asCfgNode() = readNode
1106-
or
1107-
not readNode instanceof SequenceNode and
1108-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = readNode
1109-
)
1110-
)
1111-
or
1112-
unpackingAssignmentInternalReadStep(nodeFrom, c, nodeTo)
1127+
ControlFlowNode getElement(int i) { result = this.(SequenceNode).getElement(i) }
1128+
1129+
ControlFlowNode getAnElement() { result = this.getElement(_) }
11131130
}
11141131

1115-
predicate unpackingAssignmentDirectFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
1116-
// `a, *b = iterable`
1117-
// nodeFrom = `a, b`
1118-
// nodeTo = `*b`
1119-
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
1120-
nodeFrom.getNode() = target and
1121-
nodeTo.getNode() = target.getAnElement() and
1122-
nodeTo.asExpr() instanceof Starred
1132+
predicate unpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
1133+
exists(UnpackingAssignmentTarget target | target instanceof SequenceNode |
1134+
nodeFrom = TIterableSequence(target) and
1135+
nodeTo.asCfgNode() = target
11231136
)
11241137
}
11251138

1126-
predicate unpackingAssignmentConvertingReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1127-
// iterable unpacking
1128-
// `a, b = iterable`
1129-
// nodeFrom is `iterable`
1130-
// nodeTo is synthetic IterableElement
1131-
// c is whatever element content `iterable` might carry
1132-
// we wish to consume c, so that we can later write it back in the type of the lhs.
1133-
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
1134-
nodeFrom.asExpr() = assign.getValue() and
1139+
predicate unpackingAssignmentConvertingReadStep(Node nodeFrom, Content c, Node nodeTo) {
1140+
exists(UnpackingAssignmentTarget target | target instanceof SequenceNode |
1141+
nodeFrom = TIterableSequence(target) and
11351142
nodeTo = TIterableElement(target) and
11361143
(
11371144
c instanceof ListElementContent
@@ -1149,15 +1156,10 @@ module unpackinAssignment {
11491156
)
11501157
}
11511158

1152-
predicate unpackingAssignmentConvertingStoreStep(Node nodeFrom, Content c, CfgNode nodeTo) {
1153-
// iterable unpacking
1154-
// `a, b = iterable`
1155-
// nodeFrom is synthetic IterableElement
1156-
// nodeTo is `a, b`
1157-
// c is consistent with the type of the lhs.
1158-
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
1159+
predicate unpackingAssignmentConvertingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1160+
exists(UnpackingAssignmentTarget target | target instanceof SequenceNode |
11591161
nodeFrom = TIterableElement(target) and
1160-
nodeTo.getNode() = target and
1162+
nodeTo.asCfgNode() = target and
11611163
(
11621164
target instanceof ListNode and
11631165
c instanceof ListElementContent
@@ -1170,47 +1172,51 @@ module unpackinAssignment {
11701172
)
11711173
}
11721174

1173-
predicate unpackingAssignmentConvertingInternalReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1174-
exists(Assign assign, SequenceNode target, SequenceNode readFrom |
1175-
target.getNode() = assign.getATarget() and
1176-
readFrom = target.getAnElement() // use contains to get deeper nesting
1175+
predicate unpackingAssignmentElementReadStep(Node nodeFrom, Content c, Node nodeTo) {
1176+
exists(UnpackingAssignmentTarget target, int index, ControlFlowNode element |
1177+
target instanceof SequenceNode
11771178
|
1178-
nodeFrom.getNode() = readFrom and
1179-
nodeTo = TIterableElement(readFrom) and
1179+
nodeFrom.asCfgNode() = target and
1180+
element = target.getElement(index) and
11801181
(
1182+
target instanceof ListNode and
11811183
c instanceof ListElementContent
11821184
or
1183-
c instanceof SetElementContent
1184-
or
1185-
// do not lose precision by routing tuple content through the `IterableElement`
1186-
not readFrom instanceof TupleNode and
1187-
// `index` refers to `nodeFrom`, but only the ones in `target` are relevant.
1188-
exists(int index | exists(readFrom.getElement(index)) |
1189-
c.(TupleElementContent).getIndex() = index
1190-
)
1191-
// leaving out dict content for now
1185+
target instanceof TupleNode and
1186+
c.(TupleElementContent).getIndex() = index
1187+
) and
1188+
(
1189+
if element instanceof SequenceNode
1190+
then nodeTo = TIterableSequence(element)
1191+
else
1192+
if element.getNode() instanceof Starred
1193+
then nodeTo = TIterableElement(element)
1194+
else
1195+
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
11921196
)
11931197
)
11941198
}
11951199

1196-
predicate unpackingAssignmentConvertingInternalStoreStep(Node nodeFrom, Content c, CfgNode nodeTo) {
1197-
exists(Assign assign, SequenceNode target, SequenceNode readFrom |
1198-
target.getNode() = assign.getATarget() and
1199-
readFrom = target.getAnElement() // use contains to get deeper nesting
1200-
|
1201-
nodeFrom = TIterableElement(readFrom) and
1202-
nodeTo.getNode() = readFrom and
1203-
(
1204-
readFrom instanceof ListNode and
1205-
c instanceof ListElementContent
1206-
or
1207-
readFrom instanceof TupleNode and
1208-
exists(int index | exists(readFrom.getElement(index)) |
1209-
c.(TupleElementContent).getIndex() = index
1210-
)
1211-
)
1200+
predicate unpackingAssignmentStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1201+
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
1202+
nodeFrom = TIterableElement(starred) and
1203+
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
1204+
c instanceof ListElementContent
12121205
)
12131206
}
1207+
1208+
/** Data flows from an iterable to an assigned variable. */
1209+
predicate unpackingAssignmentReadStep(Node nodeFrom, Content c, Node nodeTo) {
1210+
unpackingAssignmentElementReadStep(nodeFrom, c, nodeTo)
1211+
or
1212+
unpackingAssignmentConvertingReadStep(nodeFrom, c, nodeTo)
1213+
}
1214+
1215+
predicate unpackingAssignmentStoreStep(Node nodeFrom, Content c, Node nodeTo) {
1216+
unpackingAssignmentStarredElementStoreStep(nodeFrom, c, nodeTo)
1217+
or
1218+
unpackingAssignmentConvertingStoreStep(nodeFrom, c, nodeTo)
1219+
}
12141220
}
12151221

12161222
import unpackinAssignment

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,17 +61,15 @@ newtype TNode =
6161
TKwUnpacked(CallNode call, CallableValue callable, string name) {
6262
call_unpacks(call, _, callable, name, _)
6363
} or
64+
/**
65+
* A synthetic node representing that an iterable sequence flows to consumer.
66+
*/
67+
TIterableSequence(UnpackingAssignmentTarget consumer) { consumer instanceof SequenceNode } or
6468
/**
6569
* A synthetic node representing that there may be an iterable element
6670
* for `consumer` to consume.
6771
*/
68-
TIterableElement(SequenceNode consumer) {
69-
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
70-
consumer = target
71-
or
72-
consumer = target.getAnElement() // use containts for deeper nesting
73-
)
74-
}
72+
TIterableElement(UnpackingAssignmentTarget consumer)
7573

7674
/** Helper for `Node::getEnclosingCallable`. */
7775
private DataFlowCallable getCallableScope(Scope s) {
@@ -331,12 +329,26 @@ class KwUnpacked extends Node, TKwUnpacked {
331329
}
332330

333331
/**
334-
* A synthetic node representing an iterable element. Use for changing content type
332+
* A synthetic node representing an iterable sequence. Used for changing content type
335333
* for instance from a `ListElement` to a `TupleElement`.
336334
*/
337-
class IterableElement extends Node, TIterableElement {
335+
class IterableSequence extends Node, TIterableSequence {
338336
SequenceNode consumer;
339337

338+
IterableSequence() { this = TIterableSequence(consumer) }
339+
340+
override string toString() { result = "IterableSequence" }
341+
342+
override Location getLocation() { result = consumer.getLocation() }
343+
}
344+
345+
/**
346+
* A synthetic node representing an iterable element. Used for changing content type
347+
* for instance from a `ListElement` to a `TupleElement`.
348+
*/
349+
class IterableElement extends Node, TIterableElement {
350+
ControlFlowNode consumer;
351+
340352
IterableElement() { this = TIterableElement(consumer) }
341353

342354
override string toString() { result = "IterableElement" }

0 commit comments

Comments
 (0)