Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e77ceaf

Browse files
committed
Python: Track dictionary keys
Also, less hacky comprehension, but I think we still want to fix the extractor
1 parent 7c23559 commit e77ceaf

3 files changed

Lines changed: 118 additions & 36 deletions

File tree

python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,23 +230,31 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
230230
// `[..., 42, ...]`
231231
// nodeFrom is `42`, cfg node
232232
// nodeTo is the sequence, say `[..., 42, ...]`, cfg node
233+
// c denotes list or c denotes tuple and index of 42
233234
//
234235
// List
235236
nodeTo.(CfgNode).getNode().(ListNode).getAnElement() = nodeFrom.(CfgNode).getNode() and
236237
c instanceof ListElementContent
237238
or
238239
// Tuple
239240
exists(int n |
240-
nodeTo.(CfgNode).getNode().(TupleNode).getNode().(Tuple).getElt(n) = nodeFrom.(CfgNode).getNode().getNode() and
241-
c.(TupleElementContent).getIndex() = n and
242-
nodeFrom.(CfgNode).getNode().(NameNode).getId() = "SOURCE"
241+
nodeTo.(CfgNode).getNode().(TupleNode).getElement(n) = nodeFrom.(CfgNode).getNode() and
242+
c.(TupleElementContent).getIndex() = n
243+
)
244+
or
245+
// Dict
246+
exists(KeyValuePair item |
247+
item = nodeTo.(CfgNode).getNode().(DictNode).getNode().(Dict).getAnItem() and
248+
nodeFrom.(CfgNode).getNode().getNode() = item.getValue() and
249+
c.(DictionaryElementContent).getKey() = item.getKey().(StrConst).getS()
243250
)
244251
or
245252
//
246253
// Comprehension
247254
// `[x+1 for x in l]`
248255
// nodeFrom is `x+1`, cfg node
249256
// nodeTo is `[x+1 for x in l]`, cfg node
257+
// c denotes list or set or dictionary without index
250258
//
251259
// List
252260
nodeTo.(CfgNode).getNode().getNode().(ListComp).getElt() = nodeFrom.(CfgNode).getNode().getNode() and
@@ -269,6 +277,7 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
269277
// `l[3]`
270278
// nodeFrom is `l`, cfg node
271279
// nodeTo is `l[3]`, cfg node
280+
// c is compatible with 3
272281
nodeFrom.(CfgNode).getNode() = nodeTo.(CfgNode).getNode().(SubscriptNode).getObject() and
273282
(
274283
c instanceof ListElementContent
@@ -277,18 +286,22 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
277286
or
278287
c instanceof DictionaryElementAnyContent
279288
or
280-
c.(TupleElementContent).getIndex() = nodeTo.(CfgNode).getNode().(SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue()
289+
c.(TupleElementContent).getIndex() =
290+
nodeTo.(CfgNode).getNode().(SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue()
281291
or
282-
c.(DictionaryElementContent).getKey() = nodeTo.(CfgNode).getNode().(SubscriptNode).getIndex().getNode().(StrConst).getS()
292+
c.(DictionaryElementContent).getKey() =
293+
nodeTo.(CfgNode).getNode().(SubscriptNode).getIndex().getNode().(StrConst).getS()
283294
)
284295
or
285-
// set.pop
296+
// set.pop or list.pop
286297
// `s.pop()`
287298
// nodeFrom is `s`, cfg node
288299
// nodeTo is `s.pop()`, cfg node
300+
// c denotes list or set
289301
exists(CallNode call, AttrNode a |
290302
call.getFunction() = a and
291303
a.getName() = "pop" and // TODO: Should be made more robust, like Value::named("set.pop").getACall()
304+
not exists(call.getAnArg()) and
292305
nodeFrom.(CfgNode).getNode() = a.getObject() and
293306
nodeTo.(CfgNode).getNode() = call and
294307
(
@@ -298,16 +311,51 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
298311
)
299312
)
300313
or
314+
// dict.pop
315+
// `d.pop(key)`
316+
// nodeFrom is `d`, cfg node
317+
// nodeTo is `d.pop(key)`, cfg node
318+
// c denotes key
319+
exists(CallNode call, AttrNode a |
320+
call.getFunction() = a and
321+
a.getName() = "pop" and // TODO: Should be made more robust, like Value::named("set.pop").getACall()
322+
nodeFrom.(CfgNode).getNode() = a.getObject() and
323+
nodeTo.(CfgNode).getNode() = call and
324+
c.(DictionaryElementContent).getKey() = call.getArg(0).getNode().(StrConst).getS()
325+
)
326+
or
301327
// Comprehension
302328
// `[x+1 for x in l]`
303329
// nodeFrom is `l`, cfg node
304330
// nodeTo is `x`, essa var
331+
// c denotes list or set
305332
exists(For f, Comp comp |
306-
// Seems to need extractor changes to write this part properly
307-
nodeFrom.(CfgNode).getNode().(SequenceNode).getNode().getParentNode() = comp and
308-
colocated(f.getIter(), comp) and
333+
f = getCompFor(comp) and
334+
nodeFrom.(CfgNode).getNode().(SequenceNode).getNode() = getCompIter(comp) and
309335
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getDefiningNode().getNode() =
310-
f.getTarget()
336+
f.getTarget() and
337+
(
338+
c instanceof ListElementContent
339+
or
340+
c instanceof SetElementContent
341+
)
342+
)
343+
}
344+
345+
/** This seems to compensate for extractor shortcomings */
346+
For getCompFor(Comp c) {
347+
c.contains(result) and
348+
c.getFunction() = result.getScope()
349+
}
350+
351+
/** This seems to compensate for extractor shortcomings */
352+
AstNode getCompIter(Comp c) {
353+
c.contains(result) and
354+
c.getScope() = result.getScope() and
355+
not result = c.getFunction() and
356+
not exists(AstNode between |
357+
c.contains(between) and
358+
between.contains(result)
311359
)
312360
}
313361

python/ql/test/experimental/dataflow/coverage/dataflow.expected

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,21 @@ edges
3030
| test.py:95:21:95:28 | ControlFlowNode for List [List element] | test.py:95:16:95:16 | SSA variable y |
3131
| test.py:95:22:95:27 | ControlFlowNode for SOURCE | test.py:95:21:95:28 | ControlFlowNode for List [List element] |
3232
| test.py:96:10:96:10 | ControlFlowNode for x [Set element] | test.py:96:10:96:16 | ControlFlowNode for Attribute() |
33-
| test.py:222:11:222:16 | ControlFlowNode for SOURCE | test.py:222:11:222:17 | ControlFlowNode for Tuple [Tuple element at 0] |
34-
| test.py:222:11:222:17 | ControlFlowNode for Tuple [Tuple element at 0] | test.py:222:10:222:21 | ControlFlowNode for Subscript |
35-
| test.py:225:10:225:17 | ControlFlowNode for List [List element] | test.py:225:10:225:20 | ControlFlowNode for Subscript |
36-
| test.py:225:11:225:16 | ControlFlowNode for SOURCE | test.py:225:10:225:17 | ControlFlowNode for List [List element] |
37-
| test.py:246:28:246:33 | ControlFlowNode for SOURCE | test.py:246:10:246:34 | ControlFlowNode for second() |
38-
| test.py:305:12:305:17 | ControlFlowNode for SOURCE | test.py:305:10:305:18 | ControlFlowNode for f() |
39-
| test.py:309:28:309:33 | ControlFlowNode for SOURCE | test.py:309:10:309:34 | ControlFlowNode for second() |
33+
| test.py:104:9:104:21 | ControlFlowNode for Dict [Dictionary element at s] | test.py:105:10:105:10 | ControlFlowNode for x [Dictionary element at s] |
34+
| test.py:104:15:104:20 | ControlFlowNode for SOURCE | test.py:104:9:104:21 | ControlFlowNode for Dict [Dictionary element at s] |
35+
| test.py:105:10:105:10 | ControlFlowNode for x [Dictionary element at s] | test.py:105:10:105:15 | ControlFlowNode for Subscript |
36+
| test.py:108:9:108:21 | ControlFlowNode for Dict [Dictionary element at s] | test.py:109:10:109:10 | ControlFlowNode for x [Dictionary element at s] |
37+
| test.py:108:15:108:20 | ControlFlowNode for SOURCE | test.py:108:9:108:21 | ControlFlowNode for Dict [Dictionary element at s] |
38+
| test.py:109:10:109:10 | ControlFlowNode for x [Dictionary element at s] | test.py:109:10:109:19 | ControlFlowNode for Attribute() |
39+
| test.py:234:11:234:16 | ControlFlowNode for SOURCE | test.py:234:11:234:17 | ControlFlowNode for Tuple [Tuple element at 0] |
40+
| test.py:234:11:234:17 | ControlFlowNode for Tuple [Tuple element at 0] | test.py:234:10:234:21 | ControlFlowNode for Subscript |
41+
| test.py:237:10:237:17 | ControlFlowNode for List [List element] | test.py:237:10:237:20 | ControlFlowNode for Subscript |
42+
| test.py:237:11:237:16 | ControlFlowNode for SOURCE | test.py:237:10:237:17 | ControlFlowNode for List [List element] |
43+
| test.py:240:10:240:21 | ControlFlowNode for Dict [Dictionary element at s] | test.py:240:10:240:26 | ControlFlowNode for Subscript |
44+
| test.py:240:15:240:20 | ControlFlowNode for SOURCE | test.py:240:10:240:21 | ControlFlowNode for Dict [Dictionary element at s] |
45+
| test.py:258:28:258:33 | ControlFlowNode for SOURCE | test.py:258:10:258:34 | ControlFlowNode for second() |
46+
| test.py:317:12:317:17 | ControlFlowNode for SOURCE | test.py:317:10:317:18 | ControlFlowNode for f() |
47+
| test.py:321:28:321:33 | ControlFlowNode for SOURCE | test.py:321:10:321:34 | ControlFlowNode for second() |
4048
nodes
4149
| test.py:24:10:24:26 | ControlFlowNode for Tuple [Tuple element at 1] | semmle.label | ControlFlowNode for Tuple [Tuple element at 1] |
4250
| test.py:24:21:24:26 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
@@ -81,18 +89,29 @@ nodes
8189
| test.py:95:22:95:27 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
8290
| test.py:96:10:96:10 | ControlFlowNode for x [Set element] | semmle.label | ControlFlowNode for x [Set element] |
8391
| test.py:96:10:96:16 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
84-
| test.py:222:10:222:21 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
85-
| test.py:222:11:222:16 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
86-
| test.py:222:11:222:17 | ControlFlowNode for Tuple [Tuple element at 0] | semmle.label | ControlFlowNode for Tuple [Tuple element at 0] |
87-
| test.py:225:10:225:17 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
88-
| test.py:225:10:225:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
89-
| test.py:225:11:225:16 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
90-
| test.py:246:10:246:34 | ControlFlowNode for second() | semmle.label | ControlFlowNode for second() |
91-
| test.py:246:28:246:33 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
92-
| test.py:305:10:305:18 | ControlFlowNode for f() | semmle.label | ControlFlowNode for f() |
93-
| test.py:305:12:305:17 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
94-
| test.py:309:10:309:34 | ControlFlowNode for second() | semmle.label | ControlFlowNode for second() |
95-
| test.py:309:28:309:33 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
92+
| test.py:104:9:104:21 | ControlFlowNode for Dict [Dictionary element at s] | semmle.label | ControlFlowNode for Dict [Dictionary element at s] |
93+
| test.py:104:15:104:20 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
94+
| test.py:105:10:105:10 | ControlFlowNode for x [Dictionary element at s] | semmle.label | ControlFlowNode for x [Dictionary element at s] |
95+
| test.py:105:10:105:15 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
96+
| test.py:108:9:108:21 | ControlFlowNode for Dict [Dictionary element at s] | semmle.label | ControlFlowNode for Dict [Dictionary element at s] |
97+
| test.py:108:15:108:20 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
98+
| test.py:109:10:109:10 | ControlFlowNode for x [Dictionary element at s] | semmle.label | ControlFlowNode for x [Dictionary element at s] |
99+
| test.py:109:10:109:19 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
100+
| test.py:234:10:234:21 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
101+
| test.py:234:11:234:16 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
102+
| test.py:234:11:234:17 | ControlFlowNode for Tuple [Tuple element at 0] | semmle.label | ControlFlowNode for Tuple [Tuple element at 0] |
103+
| test.py:237:10:237:17 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
104+
| test.py:237:10:237:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
105+
| test.py:237:11:237:16 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
106+
| test.py:240:10:240:21 | ControlFlowNode for Dict [Dictionary element at s] | semmle.label | ControlFlowNode for Dict [Dictionary element at s] |
107+
| test.py:240:10:240:26 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
108+
| test.py:240:15:240:20 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
109+
| test.py:258:10:258:34 | ControlFlowNode for second() | semmle.label | ControlFlowNode for second() |
110+
| test.py:258:28:258:33 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
111+
| test.py:317:10:317:18 | ControlFlowNode for f() | semmle.label | ControlFlowNode for f() |
112+
| test.py:317:12:317:17 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
113+
| test.py:321:10:321:34 | ControlFlowNode for second() | semmle.label | ControlFlowNode for second() |
114+
| test.py:321:28:321:33 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
96115
#select
97116
| test.py:26:10:26:10 | ControlFlowNode for y | test.py:24:21:24:26 | ControlFlowNode for SOURCE | test.py:26:10:26:10 | ControlFlowNode for y | <message> |
98117
| test.py:36:10:36:10 | ControlFlowNode for x | test.py:35:9:35:14 | ControlFlowNode for SOURCE | test.py:36:10:36:10 | ControlFlowNode for x | <message> |
@@ -106,8 +125,11 @@ nodes
106125
| test.py:79:10:79:13 | ControlFlowNode for Subscript | test.py:78:22:78:27 | ControlFlowNode for SOURCE | test.py:79:10:79:13 | ControlFlowNode for Subscript | <message> |
107126
| test.py:92:10:92:16 | ControlFlowNode for Attribute() | test.py:91:10:91:15 | ControlFlowNode for SOURCE | test.py:92:10:92:16 | ControlFlowNode for Attribute() | <message> |
108127
| test.py:96:10:96:16 | ControlFlowNode for Attribute() | test.py:95:22:95:27 | ControlFlowNode for SOURCE | test.py:96:10:96:16 | ControlFlowNode for Attribute() | <message> |
109-
| test.py:222:10:222:21 | ControlFlowNode for Subscript | test.py:222:11:222:16 | ControlFlowNode for SOURCE | test.py:222:10:222:21 | ControlFlowNode for Subscript | <message> |
110-
| test.py:225:10:225:20 | ControlFlowNode for Subscript | test.py:225:11:225:16 | ControlFlowNode for SOURCE | test.py:225:10:225:20 | ControlFlowNode for Subscript | <message> |
111-
| test.py:246:10:246:34 | ControlFlowNode for second() | test.py:246:28:246:33 | ControlFlowNode for SOURCE | test.py:246:10:246:34 | ControlFlowNode for second() | <message> |
112-
| test.py:305:10:305:18 | ControlFlowNode for f() | test.py:305:12:305:17 | ControlFlowNode for SOURCE | test.py:305:10:305:18 | ControlFlowNode for f() | <message> |
113-
| test.py:309:10:309:34 | ControlFlowNode for second() | test.py:309:28:309:33 | ControlFlowNode for SOURCE | test.py:309:10:309:34 | ControlFlowNode for second() | <message> |
128+
| test.py:105:10:105:15 | ControlFlowNode for Subscript | test.py:104:15:104:20 | ControlFlowNode for SOURCE | test.py:105:10:105:15 | ControlFlowNode for Subscript | <message> |
129+
| test.py:109:10:109:19 | ControlFlowNode for Attribute() | test.py:108:15:108:20 | ControlFlowNode for SOURCE | test.py:109:10:109:19 | ControlFlowNode for Attribute() | <message> |
130+
| test.py:234:10:234:21 | ControlFlowNode for Subscript | test.py:234:11:234:16 | ControlFlowNode for SOURCE | test.py:234:10:234:21 | ControlFlowNode for Subscript | <message> |
131+
| test.py:237:10:237:20 | ControlFlowNode for Subscript | test.py:237:11:237:16 | ControlFlowNode for SOURCE | test.py:237:10:237:20 | ControlFlowNode for Subscript | <message> |
132+
| test.py:240:10:240:26 | ControlFlowNode for Subscript | test.py:240:15:240:20 | ControlFlowNode for SOURCE | test.py:240:10:240:26 | ControlFlowNode for Subscript | <message> |
133+
| test.py:258:10:258:34 | ControlFlowNode for second() | test.py:258:28:258:33 | ControlFlowNode for SOURCE | test.py:258:10:258:34 | ControlFlowNode for second() | <message> |
134+
| test.py:317:10:317:18 | ControlFlowNode for f() | test.py:317:12:317:17 | ControlFlowNode for SOURCE | test.py:317:10:317:18 | ControlFlowNode for f() | <message> |
135+
| test.py:321:10:321:34 | ControlFlowNode for second() | test.py:321:28:321:33 | ControlFlowNode for SOURCE | test.py:321:10:321:34 | ControlFlowNode for second() | <message> |

python/ql/test/experimental/dataflow/coverage/test.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,28 @@ def test_nested_set_display():
102102
# 6.2.7. Dictionary displays
103103
def test_dict_display():
104104
x = {"s": SOURCE}
105-
SINK(x["s"]) # Flow missing
105+
SINK(x["s"])
106+
107+
def test_dict_display_pop():
108+
x = {"s": SOURCE}
109+
SINK(x.pop("s"))
106110

107111
def test_dict_comprehension():
108112
x = {y: SOURCE for y in ["s"]}
109113
SINK(x["s"]) # Flow missing
110114

115+
def test_dict_comprehension_pop():
116+
x = {y: SOURCE for y in ["s"]}
117+
SINK(x.pop("s")) # Flow missing
118+
111119
def test_nested_dict_display():
112120
x = {** {"s": SOURCE}}
113121
SINK(x["s"]) # Flow missing
114122

123+
def test_nested_dict_display_pop():
124+
x = {** {"s": SOURCE}}
125+
SINK(x.pop("s")) # Flow missing
126+
115127
# 6.2.8. Generator expressions
116128
def test_generator():
117129
x = (SOURCE for y in [NONSOURCE])
@@ -225,7 +237,7 @@ def test_subscription_list():
225237
SINK([SOURCE][0])
226238

227239
def test_subscription_mapping():
228-
SINK({"s":SOURCE}["s"]) # Flow missing
240+
SINK({"s":SOURCE}["s"])
229241

230242
# overriding __getitem__ should be tested by the class coverage tests
231243

0 commit comments

Comments
 (0)