Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c661f43

Browse files
committed
Python: Port use-use implementation from Java
1 parent a1cec12 commit c661f43

7 files changed

Lines changed: 199 additions & 67 deletions

File tree

python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
private import python
22
private import DataFlowPublic
33
import semmle.python.SpecialMethods
4+
private import semmle.python.essa.SsaCompute
45

56
//--------
67
// Data flow graph
@@ -97,12 +98,19 @@ module EssaFlow {
9798
contextManager.strictlyDominates(var)
9899
)
99100
or
100-
// Use
101+
// First use after definition
101102
// `y = 42`
102103
// `x = f(y)`
103104
// nodeFrom is `y` on first line, essa var
104105
// nodeTo is `y` on second line, cfg node
105-
nodeFrom.(EssaNode).getVar().getASourceUse() = nodeTo.(CfgNode).getNode()
106+
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
107+
or
108+
// Next use after use
109+
// `x = f(y)`
110+
// `z = y + 1`
111+
// nodeFrom is 'y' on first line, cfg node
112+
// nodeTo is `y` on second line, cfg node
113+
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
106114
or
107115
// Refinements
108116
exists(EssaEdgeRefinement r |
@@ -120,6 +128,14 @@ module EssaFlow {
120128
nodeFrom.(EssaNode).getVar() = p.getAnInput()
121129
)
122130
}
131+
132+
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
133+
AdjacentUses::adjacentUseUseSameVar(nodeFrom, nodeTo)
134+
}
135+
136+
predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
137+
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
138+
}
123139
}
124140

125141
//--------
@@ -136,6 +152,10 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
136152
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
137153
}
138154

155+
/**
156+
* If a node `n` has a post-update node `pun(n)`, we want forward flow to go from
157+
* `pun(n)` rather than from `n`.
158+
*/
139159
private Node update(Node node) {
140160
exists(PostUpdateNode pun |
141161
node = pun.getPreUpdateNode() and

python/ql/src/semmle/python/essa/SsaCompute.qll

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,11 +348,141 @@ private module SsaComputeImpl {
348348
)
349349
}
350350
}
351+
352+
cached
353+
module AdjacentUsesImpl {
354+
/**
355+
* Holds if `rankix` is the rank the index `i` at which there is an SSA definition or explicit use of
356+
* `v` in the basic block `b`.
357+
*/
358+
cached
359+
predicate defSourceUseRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
360+
i = rank[rankix](int j | variableDefine(v, _, b, j) or variableSourceUse(v, _, b, j))
361+
}
362+
363+
/** A `VarAccess` `use` of `v` in `b` at index `i`. */
364+
cached
365+
predicate variableSourceUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
366+
v.getASourceUse() = use and
367+
exists(int j |
368+
b.getNode(j) = use and
369+
i = 2 * j
370+
)
371+
}
372+
373+
/** Gets the maximum rank index for the given variable and basic block. */
374+
private int lastSourceUseRank(SsaSourceVariable v, BasicBlock b) {
375+
result = max(int rankix | defSourceUseRank(v, b, rankix, _))
376+
}
377+
378+
/** Holds if `v` is defined or used in `b`. */
379+
private predicate varOccursInBlock(SsaSourceVariable v, BasicBlock b) {
380+
defSourceUseRank(v, b, _, _)
381+
}
382+
383+
/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
384+
private predicate blockPrecedesVar(SsaSourceVariable v, BasicBlock b) {
385+
varOccursInBlock(v, b.getASuccessor*())
386+
}
387+
388+
/**
389+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` [and
390+
* in `b2` or one of its transitive successors]? but not in any block on the path
391+
* between `b1` and `b2`.
392+
*/
393+
private predicate varBlockReaches(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
394+
varOccursInBlock(v, b1) and b2 = b1.getASuccessor()
395+
or
396+
exists(BasicBlock mid |
397+
varBlockReaches(v, b1, mid) and
398+
b2 = mid.getASuccessor() and
399+
not varOccursInBlock(v, mid) and
400+
blockPrecedesVar(v, b2)
401+
)
402+
}
403+
404+
/**
405+
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
406+
* `b2` but not in any block on the path between `b1` and `b2`.
407+
*/
408+
private predicate varBlockStep(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
409+
varBlockReaches(v, b1, b2) and
410+
varOccursInBlock(v, b2)
411+
}
412+
413+
/**
414+
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
415+
* there is a path between them without any occurrence of `v`.
416+
*/
417+
cached
418+
predicate adjacentVarRefs(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2) {
419+
exists(int rankix |
420+
b1 = b2 and
421+
defSourceUseRank(v, b1, rankix, i1) and
422+
defSourceUseRank(v, b2, rankix + 1, i2)
423+
)
424+
or
425+
defSourceUseRank(v, b1, lastSourceUseRank(v, b1), i1) and
426+
varBlockStep(v, b1, b2) and
427+
defSourceUseRank(v, b2, 1, i2)
428+
}
429+
430+
/**
431+
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
432+
* variable, that is, the value read in `use1` can reach `use2` without passing
433+
* through any other use or any SSA definition of the variable.
434+
*/
435+
cached
436+
predicate adjacentUseUseSameVar(ControlFlowNode use1, ControlFlowNode use2) {
437+
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
438+
adjacentVarRefs(v, b1, i1, b2, i2) and
439+
variableUse(v, use1, b1, i1) and
440+
variableUse(v, use2, b2, i2)
441+
)
442+
}
443+
444+
/**
445+
* Holds if the value defined at `def` can reach `use` without passing through
446+
* any other uses, but possibly through phi nodes and uncertain implicit updates.
447+
*/
448+
cached
449+
predicate firstUse(EssaDefinition def, ControlFlowNode use) {
450+
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
451+
adjacentVarRefs(v, b1, i1, b2, i2) and
452+
definesAt(def, v, b1, i1) and
453+
variableUse(v, use, b2, i2)
454+
)
455+
or
456+
exists(
457+
SsaSourceVariable v, EssaDefinition redef, BasicBlock b1, int i1, BasicBlock b2, int i2
458+
|
459+
redef instanceof PhiFunction
460+
|
461+
adjacentVarRefs(v, b1, i1, b2, i2) and
462+
definesAt(def, v, b1, i1) and
463+
definesAt(redef, v, b2, i2) and
464+
firstUse(redef, use)
465+
)
466+
}
467+
468+
cached
469+
predicate definesAt(EssaDefinition def, SsaSourceVariable v, BasicBlock b, int i) {
470+
exists(ControlFlowNode defNode |
471+
def.(EssaNodeDefinition).definedBy(v, defNode) and
472+
variableDefine(v, defNode, b, i)
473+
)
474+
or
475+
v = def.(PhiFunction).getSourceVariable() and
476+
b = def.(PhiFunction).getBasicBlock() and
477+
i = -1
478+
}
479+
}
351480
}
352481

353482
import SsaComputeImpl::SsaDefinitionsImpl as SsaDefinitions
354483
import SsaComputeImpl::EssaDefinitionsImpl as EssaDefinitions
355484
import SsaComputeImpl::LivenessImpl as Liveness
485+
import SsaComputeImpl::AdjacentUsesImpl as AdjacentUses
356486

357487
/* This is exported primarily for testing */
358488
/*

python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ uniqueEnclosingCallable
55
| module.py:2:8:2:13 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
66
| module.py:5:1:5:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
77
| module.py:5:5:5:18 | GSSA Variable dangerous_func | Node should have one enclosing callable but has 0. |
8+
| module.py:9:9:9:14 | ControlFlowNode for SOURCE | Node should have one enclosing callable but has 0. |
89
| module.py:10:1:10:5 | GSSA Variable safe2 | Node should have one enclosing callable but has 0. |
910
| module.py:10:9:10:14 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
1011
| test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |

python/ql/test/experimental/dataflow/coverage/dataflow.expected

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
11
edges
2-
| datamodel.py:13:1:13:6 | GSSA Variable SOURCE | datamodel.py:38:6:38:17 | GSSA Variable SOURCE |
32
| datamodel.py:13:1:13:6 | GSSA Variable SOURCE | datamodel.py:38:8:38:13 | ControlFlowNode for SOURCE |
43
| datamodel.py:13:10:13:17 | ControlFlowNode for Str | datamodel.py:13:1:13:6 | GSSA Variable SOURCE |
5-
| datamodel.py:38:6:38:17 | GSSA Variable SOURCE | datamodel.py:71:6:71:24 | GSSA Variable SOURCE |
6-
| datamodel.py:38:6:38:17 | GSSA Variable SOURCE | datamodel.py:71:15:71:20 | ControlFlowNode for SOURCE |
74
| datamodel.py:38:8:38:13 | ControlFlowNode for SOURCE | datamodel.py:38:6:38:17 | ControlFlowNode for f() |
8-
| datamodel.py:71:6:71:24 | GSSA Variable SOURCE | datamodel.py:72:18:72:23 | ControlFlowNode for SOURCE |
9-
| datamodel.py:71:6:71:24 | GSSA Variable SOURCE | datamodel.py:80:6:80:26 | GSSA Variable SOURCE |
10-
| datamodel.py:71:6:71:24 | GSSA Variable SOURCE | datamodel.py:80:20:80:25 | ControlFlowNode for SOURCE |
115
| datamodel.py:71:15:71:20 | ControlFlowNode for SOURCE | datamodel.py:71:6:71:24 | ControlFlowNode for Attribute() |
126
| datamodel.py:72:18:72:23 | ControlFlowNode for SOURCE | datamodel.py:72:6:72:27 | ControlFlowNode for Attribute() |
13-
| datamodel.py:80:6:80:26 | GSSA Variable SOURCE | datamodel.py:81:20:81:25 | ControlFlowNode for SOURCE |
147
| datamodel.py:80:20:80:25 | ControlFlowNode for SOURCE | datamodel.py:80:6:80:26 | ControlFlowNode for Attribute() |
158
| datamodel.py:81:20:81:25 | ControlFlowNode for SOURCE | datamodel.py:81:6:81:26 | ControlFlowNode for Attribute() |
169
| test.py:32:10:32:26 | ControlFlowNode for Tuple [Tuple element at index 1] | test.py:33:9:33:9 | ControlFlowNode for x [Tuple element at index 1] |
@@ -77,20 +70,16 @@ edges
7770
| test.py:335:12:335:17 | ControlFlowNode for SOURCE | test.py:335:10:335:18 | ControlFlowNode for f() |
7871
| test.py:339:28:339:33 | ControlFlowNode for SOURCE | test.py:339:10:339:34 | ControlFlowNode for second() |
7972
| test.py:372:9:372:14 | ControlFlowNode for SOURCE | test.py:374:10:374:10 | ControlFlowNode for a |
80-
| test.py:372:9:372:14 | ControlFlowNode for SOURCE | test.py:379:10:379:10 | ControlFlowNode for b |
8173
nodes
8274
| datamodel.py:13:1:13:6 | GSSA Variable SOURCE | semmle.label | GSSA Variable SOURCE |
8375
| datamodel.py:13:10:13:17 | ControlFlowNode for Str | semmle.label | ControlFlowNode for Str |
8476
| datamodel.py:38:6:38:17 | ControlFlowNode for f() | semmle.label | ControlFlowNode for f() |
85-
| datamodel.py:38:6:38:17 | GSSA Variable SOURCE | semmle.label | GSSA Variable SOURCE |
8677
| datamodel.py:38:8:38:13 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
8778
| datamodel.py:71:6:71:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
88-
| datamodel.py:71:6:71:24 | GSSA Variable SOURCE | semmle.label | GSSA Variable SOURCE |
8979
| datamodel.py:71:15:71:20 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
9080
| datamodel.py:72:6:72:27 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
9181
| datamodel.py:72:18:72:23 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
9282
| datamodel.py:80:6:80:26 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
93-
| datamodel.py:80:6:80:26 | GSSA Variable SOURCE | semmle.label | GSSA Variable SOURCE |
9483
| datamodel.py:80:20:80:25 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
9584
| datamodel.py:81:6:81:26 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
9685
| datamodel.py:81:20:81:25 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
@@ -182,17 +171,12 @@ nodes
182171
| test.py:339:28:339:33 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
183172
| test.py:372:9:372:14 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
184173
| test.py:374:10:374:10 | ControlFlowNode for a | semmle.label | ControlFlowNode for a |
185-
| test.py:379:10:379:10 | ControlFlowNode for b | semmle.label | ControlFlowNode for b |
186174
#select
187175
| datamodel.py:38:6:38:17 | ControlFlowNode for f() | datamodel.py:13:10:13:17 | ControlFlowNode for Str | datamodel.py:38:6:38:17 | ControlFlowNode for f() | <message> |
188176
| datamodel.py:38:6:38:17 | ControlFlowNode for f() | datamodel.py:38:8:38:13 | ControlFlowNode for SOURCE | datamodel.py:38:6:38:17 | ControlFlowNode for f() | <message> |
189-
| datamodel.py:71:6:71:24 | ControlFlowNode for Attribute() | datamodel.py:13:10:13:17 | ControlFlowNode for Str | datamodel.py:71:6:71:24 | ControlFlowNode for Attribute() | <message> |
190177
| datamodel.py:71:6:71:24 | ControlFlowNode for Attribute() | datamodel.py:71:15:71:20 | ControlFlowNode for SOURCE | datamodel.py:71:6:71:24 | ControlFlowNode for Attribute() | <message> |
191-
| datamodel.py:72:6:72:27 | ControlFlowNode for Attribute() | datamodel.py:13:10:13:17 | ControlFlowNode for Str | datamodel.py:72:6:72:27 | ControlFlowNode for Attribute() | <message> |
192178
| datamodel.py:72:6:72:27 | ControlFlowNode for Attribute() | datamodel.py:72:18:72:23 | ControlFlowNode for SOURCE | datamodel.py:72:6:72:27 | ControlFlowNode for Attribute() | <message> |
193-
| datamodel.py:80:6:80:26 | ControlFlowNode for Attribute() | datamodel.py:13:10:13:17 | ControlFlowNode for Str | datamodel.py:80:6:80:26 | ControlFlowNode for Attribute() | <message> |
194179
| datamodel.py:80:6:80:26 | ControlFlowNode for Attribute() | datamodel.py:80:20:80:25 | ControlFlowNode for SOURCE | datamodel.py:80:6:80:26 | ControlFlowNode for Attribute() | <message> |
195-
| datamodel.py:81:6:81:26 | ControlFlowNode for Attribute() | datamodel.py:13:10:13:17 | ControlFlowNode for Str | datamodel.py:81:6:81:26 | ControlFlowNode for Attribute() | <message> |
196180
| datamodel.py:81:6:81:26 | ControlFlowNode for Attribute() | datamodel.py:81:20:81:25 | ControlFlowNode for SOURCE | datamodel.py:81:6:81:26 | ControlFlowNode for Attribute() | <message> |
197181
| test.py:34:10:34:10 | ControlFlowNode for y | test.py:32:21:32:26 | ControlFlowNode for SOURCE | test.py:34:10:34:10 | ControlFlowNode for y | <message> |
198182
| test.py:44:10:44:10 | ControlFlowNode for x | test.py:43:9:43:14 | ControlFlowNode for SOURCE | test.py:44:10:44:10 | ControlFlowNode for x | <message> |
@@ -218,4 +202,3 @@ nodes
218202
| test.py:335:10:335:18 | ControlFlowNode for f() | test.py:335:12:335:17 | ControlFlowNode for SOURCE | test.py:335:10:335:18 | ControlFlowNode for f() | <message> |
219203
| test.py:339:10:339:34 | ControlFlowNode for second() | test.py:339:28:339:33 | ControlFlowNode for SOURCE | test.py:339:10:339:34 | ControlFlowNode for second() | <message> |
220204
| test.py:374:10:374:10 | ControlFlowNode for a | test.py:372:9:372:14 | ControlFlowNode for SOURCE | test.py:374:10:374:10 | ControlFlowNode for a | <message> |
221-
| test.py:379:10:379:10 | ControlFlowNode for b | test.py:372:9:372:14 | ControlFlowNode for SOURCE | test.py:379:10:379:10 | ControlFlowNode for b | <message> |

python/ql/test/experimental/dataflow/regression/dataflow.expected

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,3 @@
1515
| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:182:16:182:16 | ControlFlowNode for t |
1616
| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:184:16:184:16 | ControlFlowNode for t |
1717
| test.py:178:9:178:14 | ControlFlowNode for SOURCE | test.py:186:14:186:14 | ControlFlowNode for t |
18-
| test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:197:14:197:14 | ControlFlowNode for t |
19-
| test.py:195:9:195:14 | ControlFlowNode for SOURCE | test.py:199:14:199:14 | ControlFlowNode for t |

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.expected

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
| test_collections.py:16 | ok | test_access | tainted_list.copy() |
22
| test_collections.py:24 | ok | list_clear | tainted_list |
3-
| test_collections.py:27 | fail | list_clear | tainted_list |
3+
| test_collections.py:27 | ok | list_clear | tainted_list |
44
| test_string.py:17 | ok | str_methods | ts.casefold() |
5-
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
6-
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |
5+
| test_string.py:19 | fail | str_methods | ts.format_map(..) |
6+
| test_string.py:20 | fail | str_methods | "{unsafe}".format_map(..) |
77
| test_string.py:31 | fail | binary_decode_encode | base64.a85encode(..) |
88
| test_string.py:32 | fail | binary_decode_encode | base64.a85decode(..) |
99
| test_string.py:35 | fail | binary_decode_encode | base64.b85encode(..) |

0 commit comments

Comments
 (0)