Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4ee2f49

Browse files
committed
Python: model conversion during unpacking
1 parent d8d8b45 commit 4ee2f49

4 files changed

Lines changed: 386 additions & 82 deletions

File tree

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 188 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ module EssaFlow {
161161
// If expressions
162162
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
163163
or
164+
unpackingAssignmentDirectFlowStep(nodeFrom, nodeTo)
165+
or
164166
// Overflow keyword argument
165167
exists(CallNode call, CallableValue callable |
166168
call = callable.getACall() and
@@ -846,6 +848,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
846848
or
847849
comprehensionStoreStep(nodeFrom, c, nodeTo)
848850
or
851+
unpackingAssignmentStoreStep(nodeFrom, c, nodeTo)
852+
or
849853
attributeStoreStep(nodeFrom, c, nodeTo)
850854
or
851855
posOverflowStoreStep(nodeFrom, c, nodeTo)
@@ -1010,77 +1014,197 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
10101014
)
10111015
}
10121016

1013-
predicate unpackingAssignmentRead(CfgNode nodeFrom, Content c, ControlFlowNode readNode) {
1014-
// `a, b = iterable`
1015-
// nodeFrom = `iterable`
1016-
// readNode = `a`
1017-
// c is compatible with type of `a, b` (so tuple if it was `(a, b)`)
1018-
exists(Assign assign, SequenceNode target, int index | target.getNode() = assign.getATarget() |
1019-
nodeFrom.asExpr() = assign.getValue() and
1020-
readNode = target.getElement(index) and
1021-
(
1022-
target instanceof ListNode and
1023-
c instanceof ListElementContent
1024-
or
1025-
target instanceof TupleNode and
1026-
c.(TupleElementContent).getIndex() = index
1017+
module unpackinAssignment {
1018+
/** Data flows from an iterable to an assigned variable. */
1019+
predicate unpackingAssignmentReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1020+
unpackingAssignmentToplevelReadStep(nodeFrom, c, nodeTo)
1021+
or
1022+
unpackingAssignmentInternalReadStep(nodeFrom, c, nodeTo)
1023+
or
1024+
unpackingAssignmentConvertingReadStep(nodeFrom, c, nodeTo)
1025+
or
1026+
unpackingAssignmentConvertingInternalReadStep(nodeFrom, c, nodeTo)
1027+
}
1028+
1029+
predicate unpackingAssignmentStoreStep(Node nodeFrom, Content c, CfgNode nodeTo) {
1030+
unpackingAssignmentConvertingStoreStep(nodeFrom, c, nodeTo)
1031+
or
1032+
unpackingAssignmentConvertingInternalStoreStep(nodeFrom, c, nodeTo)
1033+
}
1034+
1035+
predicate unpackingAssignmentRead(CfgNode nodeFrom, Content c, ControlFlowNode readNode) {
1036+
// `a, b = iterable`
1037+
// nodeFrom = `a, b`
1038+
// readNode = `a`
1039+
// c is compatible with type of `a, b` (so tuple if it was `(a, b)`)
1040+
exists(Assign assign, SequenceNode target, int index | target.getNode() = assign.getATarget() |
1041+
nodeFrom.getNode() = target and
1042+
readNode = target.getElement(index) and
1043+
(
1044+
target instanceof ListNode and
1045+
c instanceof ListElementContent
1046+
or
1047+
target instanceof TupleNode and
1048+
c.(TupleElementContent).getIndex() = index
1049+
)
10271050
)
1028-
)
1029-
}
1051+
}
10301052

1031-
predicate unpackingAssignmentInternalReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1032-
// iterable unpacking
1033-
// `a, (b, (c, d)) = iterable`
1034-
// nodeFrom is `(b, (c, d))`, cfg node
1035-
// nodeTo is `b`, essa var
1036-
// or `(c, d)`, cfg node
1037-
// c is compatible with `b`s (or `(c, d)`s) index
1038-
exists(
1039-
Assign assign, SequenceNode target, SequenceNode readFrom, int index, ControlFlowNode readTo
1040-
|
1041-
target.getNode() = assign.getATarget() and
1042-
readFrom = target.getAnElement() // use contains to get deeper nesting
1043-
|
1044-
nodeFrom.getNode() = readFrom and
1045-
readTo = readFrom.getElement(index) and
1046-
(
1047-
readTo instanceof SequenceNode and
1048-
nodeTo.asCfgNode() = readTo
1049-
or
1050-
not readTo instanceof SequenceNode and
1051-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = readTo
1052-
) and
1053-
(
1054-
readFrom instanceof ListNode and
1055-
c instanceof ListElementContent
1056-
or
1057-
readFrom instanceof TupleNode and
1058-
c.(TupleElementContent).getIndex() = index
1053+
predicate unpackingAssignmentInternalReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1054+
// iterable unpacking
1055+
// `a, (b, (c, d)) = iterable`
1056+
// nodeFrom is `(b, (c, d))`, cfg node
1057+
// nodeTo is `b`, essa var
1058+
// or `(c, d)`, cfg node
1059+
// c is compatible with `b`s (or `(c, d)`s) index
1060+
exists(
1061+
Assign assign, SequenceNode target, SequenceNode readFrom, int index, ControlFlowNode readTo
1062+
|
1063+
target.getNode() = assign.getATarget() and
1064+
readFrom = target.getAnElement() // use contains to get deeper nesting
1065+
|
1066+
nodeFrom.getNode() = readFrom and
1067+
readTo = readFrom.getElement(index) and
1068+
(
1069+
readTo instanceof SequenceNode and
1070+
nodeTo.asCfgNode() = readTo
1071+
or
1072+
not readTo instanceof SequenceNode and
1073+
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = readTo
1074+
) and
1075+
(
1076+
readFrom instanceof ListNode and
1077+
c instanceof ListElementContent
1078+
or
1079+
readFrom instanceof TupleNode and
1080+
c.(TupleElementContent).getIndex() = index
1081+
)
10591082
)
1060-
)
1061-
}
1083+
}
10621084

1063-
/** Data flows from an iterable to an assigned variable. */
1064-
predicate unpackingAssignmentReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1065-
// iterable unpacking
1066-
// `a, (b, c) = iterable`
1067-
// nodeFrom is `iterable`, cfg node
1068-
// nodeTo is `a`, essa var
1069-
// or `(b, c)`, cfg node
1070-
// c is compatible with `a`s (or `(b, c)`s) index
1071-
exists(ControlFlowNode readNode | unpackingAssignmentRead(nodeFrom, c, readNode) |
1072-
(
1073-
readNode instanceof SequenceNode and
1074-
nodeTo.asCfgNode() = readNode
1075-
or
1076-
not readNode instanceof SequenceNode and
1077-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = readNode
1085+
/** Data flows from an iterable to an assigned variable. */
1086+
predicate unpackingAssignmentToplevelReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1087+
// iterable unpacking
1088+
// `a, (b, c) = iterable`
1089+
// nodeFrom is `a, (b, c)`, cfg node
1090+
// nodeTo is `a`, essa var
1091+
// or `(b, c)`, cfg node
1092+
// c is compatible with `a`s (or `(b, c)`s) index
1093+
exists(ControlFlowNode readNode | unpackingAssignmentRead(nodeFrom, c, readNode) |
1094+
(
1095+
readNode instanceof SequenceNode and
1096+
nodeTo.asCfgNode() = readNode
1097+
or
1098+
not readNode instanceof SequenceNode and
1099+
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = readNode
1100+
)
10781101
)
1079-
)
1080-
or
1081-
unpackingAssignmentInternalReadStep(nodeFrom, c, nodeTo)
1102+
or
1103+
unpackingAssignmentInternalReadStep(nodeFrom, c, nodeTo)
1104+
}
1105+
1106+
predicate unpackingAssignmentDirectFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
1107+
// `a, b = iterable`
1108+
// nodeFrom = `iterable`
1109+
// readNode = `a, b`
1110+
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
1111+
nodeFrom.asExpr() = assign.getValue() and
1112+
nodeTo.getNode() = target
1113+
)
1114+
}
1115+
1116+
predicate unpackingAssignmentConvertingReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1117+
// iterable unpacking
1118+
// `a, b = iterable`
1119+
// nodeFrom is `iterable`
1120+
// nodeTo is synthetic IterableElement
1121+
// c is whatever element content `iterable` might carry
1122+
// we wish to consume c, so that we can later write it back in the type of the lhs.
1123+
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
1124+
nodeFrom.asExpr() = assign.getValue() and
1125+
nodeTo = TIterableElement(target) and
1126+
(
1127+
c instanceof ListElementContent
1128+
or
1129+
c instanceof SetElementContent
1130+
or
1131+
// do not lose precision by routing tuple content through the `IterableElement`
1132+
not target instanceof TupleNode and
1133+
// `index` refers to `nodeFrom`, but only the ones in `target` are relevant.
1134+
exists(int index | exists(target.getElement(index)) |
1135+
c.(TupleElementContent).getIndex() = index
1136+
)
1137+
// leaving out dict content for now
1138+
)
1139+
)
1140+
}
1141+
1142+
predicate unpackingAssignmentConvertingStoreStep(Node nodeFrom, Content c, CfgNode nodeTo) {
1143+
// iterable unpacking
1144+
// `a, b = iterable`
1145+
// nodeFrom is synthetic IterableElement
1146+
// nodeTo is `a, b`
1147+
// c is consistent with the type of the lhs.
1148+
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
1149+
nodeFrom = TIterableElement(target) and
1150+
nodeTo.getNode() = target and
1151+
(
1152+
target instanceof ListNode and
1153+
c instanceof ListElementContent
1154+
or
1155+
target instanceof TupleNode and
1156+
exists(int index | exists(target.getElement(index)) |
1157+
c.(TupleElementContent).getIndex() = index
1158+
)
1159+
)
1160+
)
1161+
}
1162+
1163+
predicate unpackingAssignmentConvertingInternalReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
1164+
exists(Assign assign, SequenceNode target, SequenceNode readFrom |
1165+
target.getNode() = assign.getATarget() and
1166+
readFrom = target.getAnElement() // use contains to get deeper nesting
1167+
|
1168+
nodeFrom.getNode() = readFrom and
1169+
nodeTo = TIterableElement(readFrom) and
1170+
(
1171+
c instanceof ListElementContent
1172+
or
1173+
c instanceof SetElementContent
1174+
or
1175+
// do not lose precision by routing tuple content through the `IterableElement`
1176+
not readFrom instanceof TupleNode and
1177+
// `index` refers to `nodeFrom`, but only the ones in `target` are relevant.
1178+
exists(int index | exists(readFrom.getElement(index)) |
1179+
c.(TupleElementContent).getIndex() = index
1180+
)
1181+
// leaving out dict content for now
1182+
)
1183+
)
1184+
}
1185+
1186+
predicate unpackingAssignmentConvertingInternalStoreStep(Node nodeFrom, Content c, CfgNode nodeTo) {
1187+
exists(Assign assign, SequenceNode target, SequenceNode readFrom |
1188+
target.getNode() = assign.getATarget() and
1189+
readFrom = target.getAnElement() // use contains to get deeper nesting
1190+
|
1191+
nodeFrom = TIterableElement(readFrom) and
1192+
nodeTo.getNode() = readFrom and
1193+
(
1194+
readFrom instanceof ListNode and
1195+
c instanceof ListElementContent
1196+
or
1197+
readFrom instanceof TupleNode and
1198+
exists(int index | exists(readFrom.getElement(index)) |
1199+
c.(TupleElementContent).getIndex() = index
1200+
)
1201+
)
1202+
)
1203+
}
10821204
}
10831205

1206+
import unpackinAssignment
1207+
10841208
/** Data flows from a sequence to a call to `pop` on the sequence. */
10851209
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
10861210
// set.pop or list.pop

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@ newtype TNode =
6060
*/
6161
TKwUnpacked(CallNode call, CallableValue callable, string name) {
6262
call_unpacks(call, _, callable, name, _)
63+
} or
64+
/**
65+
* A synthetic node representing that there may be an iterable element
66+
* for `consumer` to consume.
67+
*/
68+
TIterableElement(SequenceNode consumer) {
69+
exists(Assign assign, SequenceNode target | target.getNode() = assign.getATarget() |
70+
consumer = target
71+
or
72+
consumer = target.getAnElement() // use containts for deeper nesting
73+
)
6374
}
6475

6576
/** Helper for `Node::getEnclosingCallable`. */
@@ -319,6 +330,20 @@ class KwUnpacked extends Node, TKwUnpacked {
319330
override Location getLocation() { result = call.getLocation() }
320331
}
321332

333+
/**
334+
* A synthetic node representing an iterable element. Use for changing content type
335+
* for instance from a `ListElement` to a `TupleElement`.
336+
*/
337+
class IterableElement extends Node, TIterableElement {
338+
SequenceNode consumer;
339+
340+
IterableElement() { this = TIterableElement(consumer) }
341+
342+
override string toString() { result = "IterableElement" }
343+
344+
override Location getLocation() { result = consumer.getLocation() }
345+
}
346+
322347
/**
323348
* A node that controls whether other nodes are evaluated.
324349
*/

0 commit comments

Comments
 (0)