Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 71eacea

Browse files
committed
add the cached stages pattern to Python
1 parent 48015e5 commit 71eacea

13 files changed

Lines changed: 181 additions & 3 deletions

File tree

python/ql/lib/semmle/python/Flow.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import python
22
private import semmle.python.pointsto.PointsTo
3+
private import semmle.python.internal.CachedStages
34

45
/*
56
* Note about matching parent and child nodes and CFG splitting:
@@ -122,7 +123,9 @@ class ControlFlowNode extends @py_flow_node {
122123
AstNode getNode() { py_flow_bb_node(this, result, _, _) }
123124

124125
/** Gets a textual representation of this element. */
126+
cached
125127
string toString() {
128+
Stages::DataFlow::ref() and
126129
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
127130
or
128131
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
@@ -1011,6 +1014,7 @@ class BasicBlock extends @py_flow_node {
10111014

10121015
cached
10131016
BasicBlock getImmediateDominator() {
1017+
Stages::SSA::ref() and
10141018
this.firstNode().getImmediateDominator().getBasicBlock() = result
10151019
}
10161020

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,19 @@ private DataFlowCallable getCallableScope(Scope s) {
110110
result = getCallableScope(s.getEnclosingScope())
111111
}
112112

113+
private import semmle.python.internal.CachedStages
114+
113115
/**
114116
* An element, viewed as a node in a data flow graph. Either an SSA variable
115117
* (`EssaNode`) or a control flow node (`CfgNode`).
116118
*/
117119
class Node extends TNode {
118120
/** Gets a textual representation of this element. */
119-
string toString() { result = "Data flow node" }
121+
cached
122+
string toString() {
123+
Stages::DataFlow::ref() and
124+
result = "Data flow node"
125+
}
120126

121127
/** Gets the scope of this node. */
122128
Scope getScope() { none() }
@@ -134,9 +140,11 @@ class Node extends TNode {
134140
* For more information, see
135141
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
136142
*/
143+
cached
137144
predicate hasLocationInfo(
138145
string filepath, int startline, int startcolumn, int endline, int endcolumn
139146
) {
147+
Stages::DataFlow::ref() and
140148
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
141149
}
142150

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import python
1010
import DataFlowPublic
1111
private import DataFlowPrivate
12+
private import semmle.python.internal.CachedStages
1213

1314
/**
1415
* A data flow node that is a source of local flow. This includes things like
@@ -33,6 +34,7 @@ private import DataFlowPrivate
3334
class LocalSourceNode extends Node {
3435
cached
3536
LocalSourceNode() {
37+
Stages::DataFlow::ref() and
3638
this instanceof ExprNode and
3739
not simpleLocalFlowStep(_, this)
3840
or
@@ -176,6 +178,7 @@ private module Cached {
176178
*/
177179
cached
178180
predicate hasLocalSource(Node sink, LocalSourceNode source) {
181+
Stages::DataFlow::ref() and
179182
source = sink
180183
or
181184
exists(Node second |

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ private import semmle.python.dataflow.new.DataFlow
33
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
44
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
55
private import semmle.python.ApiGraphs
6+
private import semmle.python.internal.CachedStages
67

78
/**
89
* Holds if `node` should be a sanitizer in all global taint flow configurations
@@ -30,6 +31,7 @@ private module Cached {
3031
*/
3132
cached
3233
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
34+
Stages::Taint::ref() and
3335
localAdditionalTaintStep(nodeFrom, nodeTo)
3436
or
3537
any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
@@ -42,6 +44,7 @@ private module Cached {
4244
*/
4345
cached
4446
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
47+
Stages::Taint::ref() and
4548
concatStep(nodeFrom, nodeTo)
4649
or
4750
subscriptStep(nodeFrom, nodeTo)

python/ql/lib/semmle/python/essa/Essa.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import python
66
private import SsaCompute
77
import semmle.python.essa.Definitions
8+
private import semmle.python.internal.CachedStages
89

910
/** An (enhanced) SSA variable derived from `SsaSourceVariable`. */
1011
class EssaVariable extends TEssaDefinition {
@@ -270,6 +271,7 @@ class PhiFunction extends EssaDefinition, TPhiFunction {
270271
/** Gets the input variable for this phi node on the edge `pred` -> `this.getBasicBlock()`, if any. */
271272
cached
272273
EssaVariable getInput(BasicBlock pred) {
274+
Stages::SSA::ref() and
273275
result.getDefinition() = this.reachingDefinition(pred)
274276
or
275277
result.getDefinition() = this.inputEdgeRefinement(pred)

python/ql/lib/semmle/python/essa/SsaCompute.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
*/
9191

9292
import python
93+
private import semmle.python.internal.CachedStages
9394

9495
cached
9596
private module SsaComputeImpl {
@@ -308,6 +309,7 @@ private module SsaComputeImpl {
308309
*/
309310
cached
310311
predicate reachesEndOfBlock(SsaSourceVariable v, BasicBlock defbb, int defindex, BasicBlock b) {
312+
Stages::SSA::ref() and
311313
Liveness::liveAtExit(v, b) and
312314
(
313315
defbb = b and

python/ql/lib/semmle/python/essa/SsaDefinitions.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55

66
import python
77
private import semmle.python.pointsto.Base
8+
private import semmle.python.internal.CachedStages
89

910
cached
1011
module SsaSource {
1112
/** Holds if `v` is used as the receiver in a method call. */
1213
cached
1314
predicate method_call_refinement(Variable v, ControlFlowNode use, CallNode call) {
15+
Stages::SSA::ref() and
1416
use = v.getAUse() and
1517
call.getFunction().(AttrNode).getObject() = use and
1618
not test_contains(_, call)

python/ql/lib/semmle/python/internal/Awaited.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
private import python
88
private import semmle.python.dataflow.new.DataFlow
9+
private import semmle.python.internal.CachedStages
910

1011
/**
1112
* INTERNAL: Do not use.
@@ -14,6 +15,7 @@ private import semmle.python.dataflow.new.DataFlow
1415
*/
1516
cached
1617
DataFlow::Node awaited(DataFlow::Node awaitedValue) {
18+
Stages::DataFlow::ref() and
1719
// `await` x
1820
// - `awaitedValue` is `x`
1921
// - `result` is `await x`
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/**
2+
* INTERNAL: Do not use.
3+
*
4+
* The purpose of this file is to control which cached predicates belong to the same stage.
5+
*
6+
* Combining stages can improve performance as we are more likely to reuse shared, non-cached predicates.
7+
*
8+
* To make a predicate `p` belong to a stage `A`:
9+
* - make `p` depend on `A::ref()`, and
10+
* - make `A::backref()` depend on `p`.
11+
*
12+
* Since `A` is a cached module, `ref` and `backref` must be in the same stage, and the dependency
13+
* chain above thus forces `p` to be in that stage as well.
14+
*
15+
* With these two predicates in a `cached module` we ensure that all the cached predicates will be in a single stage at runtime.
16+
*
17+
* Grouping stages can cause unnecessary computation, as a concrete query might not depend on
18+
* all the cached predicates in a stage.
19+
* Care should therefore be taken not to combine two stages, if it is likely that a query only depend
20+
* on some but not all the cached predicates in the combined stage.
21+
*/
22+
23+
import python
24+
25+
/**
26+
* Contains a `cached module` for each stage.
27+
* Each `cached module` ensures that predicates that are supposed to be in the same stage, are in the same stage.
28+
*
29+
* Each `cached module` contain two predicates:
30+
* The first, `ref`, always holds, and is referenced from `cached` predicates.
31+
* The second, `backref`, contains references to the same `cached` predicates.
32+
* The `backref` predicate starts with `1 = 1 or` to ensure that the predicate will be optimized down to a constant by the optimizer.
33+
*/
34+
module Stages {
35+
/**
36+
* The `SSA` stage.
37+
*/
38+
cached
39+
module SSA {
40+
/**
41+
* Always holds.
42+
* Ensures that a predicate is evaluated as part of the Ast stage.
43+
*/
44+
cached
45+
predicate ref() { 1 = 1 }
46+
47+
private import semmle.python.essa.SsaDefinitions as SsaDefinitions
48+
private import semmle.python.essa.SsaCompute as SsaCompute
49+
private import semmle.python.essa.Essa as Essa
50+
51+
/**
52+
* DONT USE!
53+
* Contains references to each predicate that use the above `ref` predicate.
54+
*/
55+
cached
56+
predicate backref() {
57+
1 = 1
58+
or
59+
SsaDefinitions::SsaSource::method_call_refinement(_, _, _)
60+
or
61+
SsaCompute::SsaDefinitions::reachesEndOfBlock(_, _, _, _)
62+
or
63+
exists(any(Essa::PhiFunction p).getInput(_))
64+
}
65+
}
66+
67+
/**
68+
* The `dataflow` stage.
69+
*/
70+
cached
71+
module DataFlow {
72+
/**
73+
* Always holds.
74+
* Ensures that a predicate is evaluated as part of the DataFlow stage.
75+
*/
76+
cached
77+
predicate ref() { 1 = 1 }
78+
79+
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
80+
private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
81+
private import semmle.python.internal.Awaited as Awaited
82+
private import semmle.python.pointsto.Base as PointsToBase
83+
private import semmle.python.types.Object as TypeObject
84+
private import semmle.python.objects.TObject as TObject
85+
private import semmle.python.Flow as Flow
86+
87+
/**
88+
* DONT USE!
89+
* Contains references to each predicate that use the above `ref` predicate.
90+
*/
91+
cached
92+
predicate backref() {
93+
1 = 1
94+
or
95+
exists(any(DataFlowPublic::Node node).toString())
96+
or
97+
any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
98+
or
99+
any(LocalSources::LocalSourceNode n).flowsTo(_)
100+
or
101+
exists(Awaited::awaited(_))
102+
or
103+
PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
104+
or
105+
exists(TypeObject::Object a)
106+
or
107+
exists(TObject::TObject f)
108+
or
109+
exists(any(Flow::ControlFlowNode c).toString())
110+
}
111+
}
112+
113+
/**
114+
* The `taint` stage.
115+
*/
116+
cached
117+
module Taint {
118+
/**
119+
* Always holds.
120+
* Ensures that a predicate is evaluated as part of the DataFlow stage.
121+
*/
122+
cached
123+
predicate ref() { 1 = 1 }
124+
125+
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
126+
127+
/**
128+
* DONT USE!
129+
* Contains references to each predicate that use the above `ref` predicate.
130+
*/
131+
cached
132+
predicate backref() {
133+
1 = 1
134+
or
135+
TaintTrackingPrivate::localAdditionalTaintStep(_, _)
136+
or
137+
TaintTrackingPrivate::defaultAdditionalTaintStep(_, _)
138+
}
139+
}
140+
}

python/ql/lib/semmle/python/objects/TObject.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ private import semmle.python.types.Builtins
55
private import semmle.python.objects.ObjectInternal
66
private import semmle.python.pointsto.PointsTo
77
private import semmle.python.pointsto.PointsToContext
8+
private import semmle.python.internal.CachedStages
89

910
/**
1011
* Internal type backing `ObjectInternal` and `Value`

0 commit comments

Comments
 (0)