@@ -170,82 +170,6 @@ class InputSymbol extends TInputSymbol {
170170 }
171171}
172172
173- /**
174- * Gets a lower bound on the characters matched by the given character class term.
175- */
176- string getCCLowerBound ( RegExpTerm t ) {
177- t .getParent ( ) instanceof RegExpCharacterClass and
178- (
179- result = t .( RegExpConstant ) .getValue ( )
180- or
181- t .( RegExpCharacterRange ) .isRange ( result , _)
182- or
183- exists ( string name | name = t .( RegExpCharacterClassEscape ) .getValue ( ) |
184- name = "w" and result = "0"
185- or
186- name = "W" and result = ""
187- or
188- name = "s" and result = ""
189- or
190- name = "S" and result = ""
191- )
192- )
193- }
194-
195- /**
196- * The highest character used in a regular expression. Used to represent intervals without an upper bound.
197- */
198- string highestCharacter ( ) { result = max ( RegExpConstant c | | c .getValue ( ) ) }
199-
200- /**
201- * Gets an upper bound on the characters matched by the given character class term.
202- */
203- string getCCUpperBound ( RegExpTerm t ) {
204- t .getParent ( ) instanceof RegExpCharacterClass and
205- (
206- result = t .( RegExpConstant ) .getValue ( )
207- or
208- t .( RegExpCharacterRange ) .isRange ( _, result )
209- or
210- exists ( string name | name = t .( RegExpCharacterClassEscape ) .getValue ( ) |
211- name = "w" and result = "z"
212- or
213- name = "W" and result = highestCharacter ( )
214- or
215- name = "s" and result = highestCharacter ( )
216- or
217- name = "S" and result = highestCharacter ( )
218- )
219- )
220- }
221-
222- /**
223- * Holds if `s` belongs to `l` and is a character class whose set of matched characters is contained
224- * in the interval `lo-hi`.
225- */
226- predicate hasBounds ( RegExpRoot l , InputSymbol s , string lo , string hi ) {
227- exists ( RegExpCharacterClass cc | s = CharClass ( cc ) |
228- l = getRoot ( cc ) and
229- lo = min ( getCCLowerBound ( cc .getAChild ( ) ) ) and
230- hi = max ( getCCUpperBound ( cc .getAChild ( ) ) )
231- )
232- }
233-
234- /**
235- * Holds if `s1` and `s2` possibly have a non-empty intersection.
236- *
237- * This predicate is over-approximate; it is only used for pruning the search space.
238- */
239- predicate compatible ( InputSymbol s1 , InputSymbol s2 ) {
240- exists ( RegExpRoot l , string lo1 , string lo2 , string hi1 , string hi2 |
241- hasBounds ( l , s1 , lo1 , hi1 ) and
242- hasBounds ( l , s2 , lo2 , hi2 ) and
243- max ( string s | s = lo1 or s = lo2 ) <= min ( string s | s = hi1 or s = hi2 )
244- )
245- or
246- exists ( intersect ( s1 , s2 ) )
247- }
248-
249173newtype TState =
250174 Match ( RegExpTerm t , int i ) {
251175 getRoot ( t ) .isRelevant ( ) and
@@ -447,7 +371,7 @@ predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
447371 // Use pragma[noopt] to prevent compatible(s1,s2) from being the starting point of the join.
448372 // From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
449373 // and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
450- compatible ( s1 , s2 )
374+ exists ( intersect ( s1 , s2 ) )
451375 |
452376 s1 != s2
453377 or
@@ -473,7 +397,7 @@ predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2)
473397 exists ( State q1 , State q2 | q = MkStatePair ( q1 , q2 ) |
474398 deltaClosed ( q1 , s1 , r1 ) and
475399 deltaClosed ( q2 , s2 , r2 ) and
476- compatible ( s1 , s2 )
400+ exists ( intersect ( s1 , s2 ) )
477401 )
478402}
479403
0 commit comments