Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a374540

Browse files
committed
JS: Range analysis library
1 parent d991fa8 commit a374540

6 files changed

Lines changed: 482 additions & 0 deletions

File tree

javascript/ql/src/javascript.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import semmle.javascript.NPM
3636
import semmle.javascript.Paths
3737
import semmle.javascript.Promises
3838
import semmle.javascript.CanonicalNames
39+
import semmle.javascript.RangeAnalysis
3940
import semmle.javascript.Regexp
4041
import semmle.javascript.SSA
4142
import semmle.javascript.StandardLibrary
Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
import javascript
2+
3+
/*
4+
* The range analysis is based on Difference Bound constraints, that is, inequalities of form:
5+
*
6+
* a - b <= c
7+
*
8+
* or equivalently,
9+
*
10+
* a <= b + c
11+
*
12+
* where a and b are variables in the constraint system, and c is an integer constant.
13+
*
14+
* Such constraints obey a transitive law. Given two constraints,
15+
*
16+
* a - x <= c1
17+
* x - b <= c2
18+
*
19+
* adding the two inequalities yields the obvious transitive conclusion:
20+
*
21+
* a - b <= c1 + c2
22+
*
23+
* We view the system of constraints as a weighted graph, where `a - b <= c`
24+
* corresponds to the edge `a -> b` with weight `c`.
25+
*
26+
* Paths in this graph corresponds to the additional inequalities we can derive from the constraint set.
27+
* A negative-weight cycle represents a contradiction, such as `a <= a - 1`.
28+
*
29+
*
30+
* CONTROL FLOW:
31+
*
32+
* Each constraint is associated with a CFG node where that constraint is known to be valid.
33+
* The constraint is only valid within the dominator subtree of that node.
34+
*
35+
* The transitive rule additionally requires that, in order to compose two edges, one of
36+
* their CFG nodes must dominate the other, and the resulting edge becomes associated with the
37+
* dominated CFG node (i.e. the most restrictive scope). This ensures constraints
38+
* cannot be taken out of context.
39+
*
40+
* If a negative-weight cycle can be constructed from the edges "in scope" at a given CFG node
41+
* (i.e. associated with a dominator of the node), that node is unreachable.
42+
*
43+
*
44+
* DUAL CONSTRAINTS:
45+
*
46+
* For every data flow node `a` we have two constraint variables, `+a` and `-a` (or just `a` and `-a`)
47+
* representing the numerical value of `a` and its negation. Negations let us reason about the sum of
48+
* two variables. For example:
49+
*
50+
* a + b <= 10 becomes a - (-b) <= 10
51+
*
52+
* It also lets us reason about the upper and lower bounds of a single variable:
53+
*
54+
* a <= 10 becomes a + a <= 20 becomes a - (-a) <= 20
55+
* a >= 10 becomes -a <= -10 becomes (-a) - a <= -20
56+
*
57+
* For the graph analogy to include the relationship between `a` and `-a`, all constraints
58+
* imply their dual constraint:
59+
*
60+
* a - b <= c implies (-b) - (-a) <= c
61+
*
62+
* That is, for every edge from a -> b, there is an edge with the same weight from (-b) -> (-a).
63+
*
64+
*
65+
* PATH FINDING:
66+
*
67+
* See `extendedEdge` predicate for details about how we find negative-weight paths in the graph.
68+
*
69+
*
70+
* CAVEATS:
71+
*
72+
* - We assume !(x <= y) means x > y, ignoring NaN.
73+
*
74+
* - We assume x < y means x <= y + 1, ignoring floats.
75+
*
76+
* - We assume integer arithmetic is exact, ignoring values above 2^53.
77+
*
78+
*/
79+
80+
/**
81+
* Contains predicates for reasoning about the relative numeric value of expressions.
82+
*/
83+
module RangeAnalysis {
84+
/**
85+
* Holds if the given node has a unique data flow predecessor.
86+
*/
87+
pragma[noinline]
88+
private predicate hasUniquePredecessor(DataFlow::Node node) {
89+
strictcount(node.getAPredecessor()) = 1
90+
}
91+
92+
/**
93+
* Gets the definition of `node`, without unfolding phi nodes.
94+
*/
95+
DataFlow::Node getDefinition(DataFlow::Node node) {
96+
if hasUniquePredecessor(node) then
97+
result = getDefinition(node.getAPredecessor())
98+
else
99+
result = node
100+
}
101+
102+
/**
103+
* Holds if `r` can be modelled as `r = root * sign + bias`.
104+
*
105+
* Does not follow data flow edges and is not recursive (that is, `root` may itself be defined linearly).
106+
*/
107+
private predicate linearDefinitionStep(DataFlow::Node r, DataFlow::Node root, int sign, int bias) {
108+
not exists(r.asExpr().getIntValue()) and
109+
(
110+
exists (AddExpr expr | r.asExpr() = expr |
111+
root = expr.getLeftOperand().flow() and
112+
bias = expr.getRightOperand().getIntValue() and
113+
sign = 1)
114+
or
115+
exists (AddExpr expr | r.asExpr() = expr |
116+
bias = expr.getLeftOperand().getIntValue() and
117+
root = expr.getRightOperand().flow() and
118+
sign = 1)
119+
or
120+
exists (SubExpr expr | r.asExpr() = expr |
121+
root = expr.getLeftOperand().flow() and
122+
bias = -expr.getRightOperand().getIntValue() and
123+
sign = 1)
124+
or
125+
exists (SubExpr expr | r.asExpr() = expr |
126+
bias = expr.getLeftOperand().getIntValue() and
127+
root = expr.getRightOperand().flow() and
128+
sign = -1)
129+
or
130+
exists (NegExpr expr | r.asExpr() = expr |
131+
root = expr.getOperand().flow() and
132+
bias = 0 and
133+
sign = -1)
134+
)
135+
}
136+
137+
/**
138+
* Holds if `r` can be modelled as `r = root * sign + bias`.
139+
*/
140+
predicate linearDefinition(DataFlow::Node r, DataFlow::Node root, int sign, int bias) {
141+
if hasUniquePredecessor(r) then
142+
linearDefinition(r.getAPredecessor(), root, sign, bias)
143+
else if linearDefinitionStep(r, _, _, _) then
144+
exists (DataFlow::Node pred, int sign1, int bias1, int sign2, int bias2 |
145+
linearDefinitionStep(r, pred, sign1, bias1) and
146+
linearDefinition(pred, root, sign2, bias2) and
147+
sign = sign1 * sign2 and
148+
bias = bias1 + sign1 * bias2)
149+
else (
150+
root = r and
151+
sign = 1 and
152+
bias = 0
153+
)
154+
}
155+
156+
/**
157+
* Holds if `r` can be modelled as `r = xroot * xsign + yroot * ysign + bias`.
158+
*/
159+
predicate linearDefinitionSum(DataFlow::Node r, DataFlow::Node xroot, int xsign, DataFlow::Node yroot, int ysign, int bias) {
160+
if hasUniquePredecessor(r) then
161+
linearDefinitionSum(r.getAPredecessor(), xroot, xsign, yroot, ysign, bias)
162+
else if exists(r.asExpr().getIntValue()) then
163+
none() // do not model constants as sums
164+
else (
165+
exists (AddExpr add, int bias1, int bias2 | r.asExpr() = add |
166+
linearDefinition(add.getLeftOperand().flow(), xroot, xsign, bias1) and
167+
linearDefinition(add.getRightOperand().flow(), yroot, ysign, bias2) and
168+
bias = bias1 + bias2)
169+
or
170+
exists (SubExpr sub, int bias1, int bias2 | r.asExpr() = sub |
171+
linearDefinition(sub.getLeftOperand().flow(), xroot, xsign, bias1) and
172+
linearDefinition(sub.getRightOperand().flow(), yroot, -ysign, -bias2) and // Negate right-hand operand
173+
bias = bias1 + bias2)
174+
or
175+
linearDefinitionSum(r.asExpr().(NegExpr).getOperand().flow(), xroot, -xsign, yroot, -ysign, -bias)
176+
)
177+
}
178+
179+
/**
180+
* Holds if the given comparison can be modelled as `A <op> B + bias` where `<op>` is the comparison operator.
181+
*/
182+
predicate linearComparison(Comparison comparison, DataFlow::Node a, int asign, DataFlow::Node b, int bsign, int bias) {
183+
exists(Expr left, Expr right, int bias1, int bias2 | left = comparison.getLeftOperand() and right = comparison.getRightOperand() |
184+
// A <= B + c
185+
linearDefinition(left.flow(), a, asign, bias1) and
186+
linearDefinition(right.flow(), b, bsign, bias2) and
187+
bias = bias2 - bias1
188+
or
189+
// A - B + c1 <= c2 becomes A <= B + (c2 - c1)
190+
linearDefinitionSum(left.flow(), a, asign, b, -bsign, bias1) and
191+
right.getIntValue() = bias2 and
192+
bias = bias2 - bias1
193+
or
194+
// c1 <= -A + B + c2 becomes A <= B + (c2 - c1)
195+
left.getIntValue() = bias1 and
196+
linearDefinitionSum(right.flow(), a, -asign, b, bsign, bias2) and
197+
bias = bias2 - bias1
198+
)
199+
}
200+
201+
/**
202+
* Holds if `guard` asserts that the outcome of `A <op> B + bias` is true, where `<op>` is a comparison operator.
203+
*/
204+
predicate linearComparisonGuard(ConditionGuardNode guard, DataFlow::Node a, int asign, string operator, DataFlow::Node b, int bsign, int bias) {
205+
exists (Comparison compare | compare = getDefinition(guard.getTest().flow()).asExpr() |
206+
linearComparison(compare, a, asign, b, bsign, bias) and
207+
(
208+
guard.getOutcome() = true and operator = compare.getOperator()
209+
or
210+
guard.getOutcome() = false and operator = negateOperator(compare.getOperator())
211+
)
212+
)
213+
}
214+
215+
/**
216+
* Gets the binary operator whose return value is the opposite of `operator` (excluding NaN comparisons).
217+
*/
218+
private string negateOperator(string operator) {
219+
operator = "==" and result = "!=" or
220+
operator = "!=" and result = "==" or
221+
operator = "===" and result = "!==" or
222+
operator = "!==" and result = "===" or
223+
operator = "<" and result = ">=" or
224+
operator = "<=" and result = ">" or
225+
operator = ">" and result = "<=" or
226+
operator = ">=" and result = "<"
227+
}
228+
229+
/**
230+
* Holds if immediately after `cfg` it becomes known that `A <= B + c`.
231+
*
232+
* These are the initial inputs to the difference bound constraint system.
233+
*
234+
* The dual constraint `-B <= -A + c` is not included in this predicate.
235+
*/
236+
predicate comparisonEdge(ControlFlowNode cfg, DataFlow::Node a, int asign, DataFlow::Node b, int bsign, int bias) {
237+
// A <= B + c
238+
linearComparisonGuard(cfg, a, asign, "<=", b, bsign, bias)
239+
or
240+
// A <= B + c iff A < B + c + 1 (assuming A,B are integers)
241+
linearComparisonGuard(cfg, a, asign, "<", b, bsign, bias + 1)
242+
or
243+
// A <= B + c iff B >= A - c
244+
linearComparisonGuard(cfg, b, bsign, ">=", a, asign, -bias)
245+
or
246+
// A <= B + c iff B > A - c - 1 (assuming A,B are integers)
247+
linearComparisonGuard(cfg, b, bsign, ">", a, asign, -bias - 1)
248+
or
249+
exists (string operator | operator = "==" or operator = "===" |
250+
// A == B + c iff A <= B + c and B <= A - c
251+
linearComparisonGuard(cfg, a, asign, operator, b, bsign, bias)
252+
or
253+
linearComparisonGuard(cfg, b, bsign, operator, a, asign, -bias)
254+
)
255+
}
256+
257+
/**
258+
* The set of initial edges including those from dual constraints.
259+
*/
260+
private predicate seedEdge(ControlFlowNode cfg, DataFlow::Node a, int asign, DataFlow::Node b, int bsign, int c) {
261+
// A <= B + c
262+
comparisonEdge(cfg, a, asign, b, bsign, c)
263+
or
264+
// -B <= -A + c (dual constraint)
265+
comparisonEdge(cfg, b, -bsign, a, -asign, c)
266+
}
267+
268+
/**
269+
* Applies a restricted transitive rule to the edge set.
270+
*
271+
* In particular, we apply the transitive rule only where a negative edge followed by a non-negative edge.
272+
* For example:
273+
*
274+
* A --(-1)--> B --(+3)--> C
275+
*
276+
* yields:
277+
*
278+
* A --(+2)--> C
279+
*
280+
* In practice, the restriction to edges of different sign prevent the
281+
* quadratic blow-up you would normally get from a transitive closure.
282+
*
283+
* It also prevents the relation from becoming infinite in case
284+
* there are negative-weight cycles, where the transitive weights would
285+
* otherwise diverge towards minus infinity.
286+
*
287+
* Moreover, the rule is enough to guarantee the following property:
288+
*
289+
* A negative-weight path from X to Y exists iff a path of negative-weight edges exists from X to Y.
290+
*
291+
* This means negative-weight cycles (contradictions) can be detected using simple cycle detection.
292+
*/
293+
private predicate extendedEdge(ControlFlowNode cfg, DataFlow::Node a, int asign, DataFlow::Node b, int bsign, int c) {
294+
seedEdge(cfg, a, asign, b, bsign, c)
295+
or
296+
exists (DataFlow::Node mid, int midx, ControlFlowNode cfg1, int c1, ControlFlowNode cfg2, int c2 |
297+
extendedEdge(cfg1, a, asign, mid, midx, c1) and
298+
extendedEdge(cfg2, mid, midx, b, bsign, c2) and
299+
c1 < 0 and
300+
c2 >= 0 and
301+
c = c1 + c2 and
302+
// One of the two CFG nodes must dominate the other, and `cfg` must be bound to the dominated one.
303+
(
304+
// They are in the same basic block
305+
exists (BasicBlock bb, int i, int j |
306+
bb.getNode(i) = cfg1 and
307+
bb.getNode(j) = cfg2 and
308+
if i < j then
309+
cfg = cfg2
310+
else
311+
cfg = cfg1)
312+
or
313+
// They are in different basic blocks
314+
cfg1.getBasicBlock().(ReachableBasicBlock).strictlyDominates(cfg2.getBasicBlock()) and
315+
cfg = cfg2
316+
or
317+
cfg2.getBasicBlock().(ReachableBasicBlock).strictlyDominates(cfg1.getBasicBlock()) and
318+
cfg = cfg1
319+
)
320+
)
321+
}
322+
323+
/**
324+
* Holds if there is a negative-weight edge from src to dst.
325+
*/
326+
private predicate negativeEdge(ControlFlowNode cfg, DataFlow::Node a, int asign, DataFlow::Node b, int bsign) {
327+
exists (int weight | extendedEdge(cfg, a, asign, b, bsign, weight) |
328+
weight < 0)
329+
}
330+
331+
/**
332+
* Holds if `src` can reach `dst` using only negative-weight edges.
333+
*
334+
* The initial outgoing edge from `src` must be derived at `cfg`.
335+
*/
336+
pragma[noopt]
337+
private predicate reachableByNegativeEdges(ControlFlowNode cfg, DataFlow::Node a, int asign, DataFlow::Node b, int bsign) {
338+
negativeEdge(cfg, a, asign, b, bsign)
339+
or
340+
exists(DataFlow::Node mid, int midx, ControlFlowNode midcfg |
341+
reachableByNegativeEdges(cfg, a, asign, mid, midx) and
342+
negativeEdge(midcfg, mid, midx, b, bsign) and
343+
exists (BasicBlock bb, int i, int j |
344+
bb.getNode(i) = midcfg and
345+
bb.getNode(j) = cfg and
346+
i <= j))
347+
or
348+
// Same as above, but where CFG nodes are in different basic blocks
349+
exists(DataFlow::Node mid, int midx, ControlFlowNode midcfg, BasicBlock midBB, ReachableBasicBlock midRBB, BasicBlock cfgBB |
350+
reachableByNegativeEdges(cfg, a, asign, mid, midx) and
351+
negativeEdge(midcfg, mid, midx, b, bsign) and
352+
midBB = midcfg.getBasicBlock() and
353+
midRBB = midBB.(ReachableBasicBlock) and
354+
cfgBB = cfg.getBasicBlock() and
355+
midRBB.strictlyDominates(cfgBB)
356+
)
357+
}
358+
359+
/**
360+
* Holds if the condition asserted at `guard` is contradictory, that is, its condition always has the
361+
* opposite of the expected outcome.
362+
*/
363+
predicate isContradictoryGuardNode(ConditionGuardNode guard) {
364+
exists (DataFlow::Node a, int asign | reachableByNegativeEdges(guard, a, asign, a, asign))
365+
}
366+
}

javascript/ql/test/library-tests/RangeAnalysis/DeadBranch.expected

Whitespace-only changes.

0 commit comments

Comments
 (0)