Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a82fa04

Browse files
committed
Python: Add worked example of taint step modeling of external libs
This can't be seen on the example, but I went through quite a lot of iterations before arriving at this fairly simple solution.
1 parent 00ea0ce commit a82fa04

7 files changed

Lines changed: 325 additions & 0 deletions

File tree

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
edges
2+
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
3+
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
4+
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | test.py:41:10:41:12 | ControlFlowNode for val |
5+
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:40:11:40:25 | ControlFlowNode for Attribute() |
6+
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | test.py:54:10:54:12 | ControlFlowNode for val |
7+
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:53:11:53:25 | ControlFlowNode for Attribute() |
8+
| test.py:78:11:78:14 | ControlFlowNode for bm() | test.py:79:10:79:12 | ControlFlowNode for val |
9+
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:78:11:78:14 | ControlFlowNode for bm() |
10+
| test.py:90:11:90:14 | ControlFlowNode for bm() | test.py:91:10:91:12 | ControlFlowNode for val |
11+
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:90:11:90:14 | ControlFlowNode for bm() |
12+
nodes
13+
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
14+
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
15+
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
16+
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
17+
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
18+
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
19+
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
20+
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
21+
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
22+
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
23+
| test.py:78:11:78:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
24+
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
25+
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
26+
| test.py:90:11:90:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
27+
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
28+
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
29+
#select
30+
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (naive): test_simple |
31+
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (naive): test_alias |
32+
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (naive): test_accross_functions |
33+
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested |
34+
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (naive): test_pass_bound_method |
35+
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested_bound_method |
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* @kind path-problem
3+
*/
4+
5+
private import python
6+
private import experimental.dataflow.DataFlow
7+
private import experimental.dataflow.TaintTracking
8+
import DataFlow::PathGraph
9+
import SharedCode
10+
11+
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
12+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
13+
// obj -> obj.get_value()
14+
exists(DataFlow::Node bound_method |
15+
bound_method = myClassGetValue(nodeFrom) and
16+
nodeTo.asCfgNode().(CallNode).getFunction() = bound_method.asCfgNode()
17+
)
18+
}
19+
}
20+
21+
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
22+
where config.hasFlowPath(source, sink)
23+
select sink.getNode(), source, sink,
24+
"test flow (naive): " + source.getNode().asCfgNode().getScope().getName()
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
edges
2+
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
3+
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
4+
| test.py:39:15:39:17 | SSA variable arg | test.py:41:10:41:12 | ControlFlowNode for val |
5+
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:46:15:46:17 | ControlFlowNode for src |
6+
| test.py:46:15:46:17 | ControlFlowNode for src | test.py:39:15:39:17 | SSA variable arg |
7+
| test.py:52:24:52:26 | SSA variable arg | test.py:54:10:54:12 | ControlFlowNode for val |
8+
| test.py:57:33:57:35 | SSA variable arg | test.py:58:24:58:26 | ControlFlowNode for arg |
9+
| test.py:58:24:58:26 | ControlFlowNode for arg | test.py:52:24:52:26 | SSA variable arg |
10+
| test.py:61:33:61:35 | SSA variable arg | test.py:62:33:62:35 | ControlFlowNode for arg |
11+
| test.py:62:33:62:35 | ControlFlowNode for arg | test.py:57:33:57:35 | SSA variable arg |
12+
| test.py:65:33:65:35 | SSA variable arg | test.py:66:33:66:35 | ControlFlowNode for arg |
13+
| test.py:66:33:66:35 | ControlFlowNode for arg | test.py:61:33:61:35 | SSA variable arg |
14+
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:71:33:71:35 | ControlFlowNode for src |
15+
| test.py:71:33:71:35 | ControlFlowNode for src | test.py:65:33:65:35 | SSA variable arg |
16+
| test.py:77:23:77:24 | SSA variable bm | test.py:79:10:79:12 | ControlFlowNode for val |
17+
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:84:23:84:35 | ControlFlowNode for Attribute |
18+
| test.py:84:23:84:35 | ControlFlowNode for Attribute | test.py:77:23:77:24 | SSA variable bm |
19+
| test.py:89:37:89:38 | SSA variable bm | test.py:91:10:91:12 | ControlFlowNode for val |
20+
| test.py:94:46:94:47 | SSA variable bm | test.py:95:37:95:38 | ControlFlowNode for bm |
21+
| test.py:95:37:95:38 | ControlFlowNode for bm | test.py:89:37:89:38 | SSA variable bm |
22+
| test.py:98:46:98:47 | SSA variable bm | test.py:99:46:99:47 | ControlFlowNode for bm |
23+
| test.py:99:46:99:47 | ControlFlowNode for bm | test.py:94:46:94:47 | SSA variable bm |
24+
| test.py:102:46:102:47 | SSA variable bm | test.py:103:46:103:47 | ControlFlowNode for bm |
25+
| test.py:103:46:103:47 | ControlFlowNode for bm | test.py:98:46:98:47 | SSA variable bm |
26+
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:108:46:108:58 | ControlFlowNode for Attribute |
27+
| test.py:108:46:108:58 | ControlFlowNode for Attribute | test.py:102:46:102:47 | SSA variable bm |
28+
nodes
29+
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
30+
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
31+
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
32+
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
33+
| test.py:39:15:39:17 | SSA variable arg | semmle.label | SSA variable arg |
34+
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
35+
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
36+
| test.py:46:15:46:17 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
37+
| test.py:52:24:52:26 | SSA variable arg | semmle.label | SSA variable arg |
38+
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
39+
| test.py:57:33:57:35 | SSA variable arg | semmle.label | SSA variable arg |
40+
| test.py:58:24:58:26 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
41+
| test.py:61:33:61:35 | SSA variable arg | semmle.label | SSA variable arg |
42+
| test.py:62:33:62:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
43+
| test.py:65:33:65:35 | SSA variable arg | semmle.label | SSA variable arg |
44+
| test.py:66:33:66:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
45+
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
46+
| test.py:71:33:71:35 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
47+
| test.py:77:23:77:24 | SSA variable bm | semmle.label | SSA variable bm |
48+
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
49+
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
50+
| test.py:84:23:84:35 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
51+
| test.py:89:37:89:38 | SSA variable bm | semmle.label | SSA variable bm |
52+
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
53+
| test.py:94:46:94:47 | SSA variable bm | semmle.label | SSA variable bm |
54+
| test.py:95:37:95:38 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
55+
| test.py:98:46:98:47 | SSA variable bm | semmle.label | SSA variable bm |
56+
| test.py:99:46:99:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
57+
| test.py:102:46:102:47 | SSA variable bm | semmle.label | SSA variable bm |
58+
| test.py:103:46:103:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
59+
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
60+
| test.py:108:46:108:58 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
61+
#select
62+
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (proper): test_simple |
63+
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (proper): test_alias |
64+
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (proper): test_accross_functions |
65+
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested |
66+
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (proper): test_pass_bound_method |
67+
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested_bound_method |
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/**
2+
* @kind path-problem
3+
*/
4+
5+
private import python
6+
private import experimental.dataflow.DataFlow
7+
private import experimental.dataflow.TaintTracking
8+
import DataFlow::PathGraph
9+
import SharedCode
10+
11+
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
12+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
13+
// obj -> obj.get_value
14+
nodeTo.asCfgNode().(AttrNode).getObject("get_value") = nodeFrom.asCfgNode() and
15+
nodeTo = myClassGetValue(_)
16+
or
17+
// get_value -> get_value()
18+
nodeFrom = myClassGetValue(_) and
19+
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
20+
}
21+
}
22+
23+
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
24+
where config.hasFlowPath(source, sink)
25+
select sink.getNode(), source, sink,
26+
"test flow (proper): " + source.getNode().asCfgNode().getScope().getName()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
This test illustrates that you need to be very careful when adding additional taint-steps or dataflow steps using `TypeTracker`.
2+
3+
The basic setup is that we're modeling the behavior of a (fictitious) external library class `MyClass`, and (fictitious) source of such an instance (the `source` function).
4+
5+
```py3
6+
class MyClass:
7+
def __init__(self, value):
8+
self.value = value
9+
10+
def get_value(self):
11+
return self.value
12+
```
13+
14+
We want to extend our analysis to `obj.get_value()` is also tainted if `obj` is a tainted instance of `MyClass`.
15+
16+
The actual type-tracking is done in `SharedCode.qll`, but it's the _way_ we use it that matters.
17+
18+
In `NaiveModel.ql` we add an additional taint step from an instance of `MyClass` to calls of the bound method `get_value` (that we have tracked). It provides us with the correct results, but the path explanations are not very useful, since we are now able to cross functions in _one step_.
19+
20+
In `ProperModel.ql` we split the additional taint step in two:
21+
22+
1. from tracked `obj` that is instance of `MyClass`, to `obj.get_value` **but only** exactly where the attribute is accessed (by an `AttrNode`). This is important, since if we allowed `<any tracked qualifier>.get_value` we would again be able to cross functions in one step.
23+
2. from tracked `get_value` bound method to calls of it, **but only** exactly where the call is (by an `CallNode`). for same reason as above.
24+
25+
**Try running the queries in VS Code to see the difference**
26+
27+
### Possible improvements
28+
29+
Using `AttrNode` directly in the code here means there is no easy way to add `getattr` support too all such predicates. Not really sure how to handle this in a generalized way though :|
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
private import python
2+
private import experimental.dataflow.DataFlow
3+
private import experimental.dataflow.TaintTracking
4+
5+
// Helpers modeling MyClass
6+
/** A data-flow Node representing an instance of MyClass. */
7+
abstract class MyClass extends DataFlow::Node { }
8+
9+
private DataFlow::Node myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
10+
t.startInAttr("get_value") and
11+
result = qualifier
12+
or
13+
exists(DataFlow::TypeTracker t2 | result = myClassGetValue(qualifier, t2).track(t2, t))
14+
}
15+
16+
DataFlow::Node myClassGetValue(MyClass qualifier) {
17+
result = myClassGetValue(qualifier, DataFlow::TypeTracker::end())
18+
}
19+
20+
// Config
21+
class SourceCall extends DataFlow::Node, MyClass {
22+
SourceCall() { this.asCfgNode().(CallNode).getFunction().(NameNode).getId() = "source" }
23+
}
24+
25+
class SharedConfig extends TaintTracking::Configuration {
26+
SharedConfig() { this = "SharedConfig" }
27+
28+
override predicate isSource(DataFlow::Node source) { source instanceof SourceCall }
29+
30+
override predicate isSink(DataFlow::Node sink) {
31+
exists(CallNode call |
32+
call.getFunction().(NameNode).getId() = "sink" and
33+
call.getArg(0) = sink.asCfgNode()
34+
)
35+
}
36+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
class MyClass:
2+
def __init__(self, value):
3+
self.value = value
4+
5+
def get_value(self):
6+
return self.value
7+
8+
9+
def source():
10+
return MyClass("tainted")
11+
12+
13+
def sink(obj):
14+
print("sink", obj)
15+
16+
17+
################################################################################
18+
19+
20+
def test_simple():
21+
src = source()
22+
sink(src.get_value())
23+
24+
25+
################################################################################
26+
27+
28+
def test_alias():
29+
src = source()
30+
foo = src
31+
bound_method = foo.get_value
32+
val = bound_method()
33+
sink(val)
34+
35+
36+
################################################################################
37+
38+
39+
def sink_func(arg):
40+
val = arg.get_value()
41+
sink(val)
42+
43+
44+
def test_accross_functions():
45+
src = source()
46+
sink_func(src)
47+
48+
49+
################################################################################
50+
51+
52+
def deeply_nested_sink(arg):
53+
val = arg.get_value()
54+
sink(val)
55+
56+
57+
def deeply_nested_passthrough_1(arg):
58+
deeply_nested_sink(arg)
59+
60+
61+
def deeply_nested_passthrough_2(arg):
62+
deeply_nested_passthrough_1(arg)
63+
64+
65+
def deeply_nested_passthrough_3(arg):
66+
deeply_nested_passthrough_2(arg)
67+
68+
69+
def test_deeply_nested():
70+
src = source()
71+
deeply_nested_passthrough_3(src)
72+
73+
74+
################################################################################
75+
76+
77+
def recv_bound_method(bm):
78+
val = bm()
79+
sink(val)
80+
81+
82+
def test_pass_bound_method():
83+
src = source()
84+
recv_bound_method(src.get_value)
85+
86+
87+
################################################################################
88+
89+
def deeply_nested_bound_method_sink(bm):
90+
val = bm()
91+
sink(val)
92+
93+
94+
def deeply_nested_bound_method_passthrough_1(bm):
95+
deeply_nested_bound_method_sink(bm)
96+
97+
98+
def deeply_nested_bound_method_passthrough_2(bm):
99+
deeply_nested_bound_method_passthrough_1(bm)
100+
101+
102+
def deeply_nested_bound_method_passthrough_3(bm):
103+
deeply_nested_bound_method_passthrough_2(bm)
104+
105+
106+
def test_deeply_nested_bound_method():
107+
src = source()
108+
deeply_nested_bound_method_passthrough_3(src.get_value)

0 commit comments

Comments
 (0)