@@ -155,15 +155,22 @@ private class RegexpCharacterConstant extends RegExpConstant {
155155 RegexpCharacterConstant ( ) { this .isCharacter ( ) }
156156}
157157
158+ /**
159+ * A regexp term that is relevant for this ReDoS analysis.
160+ */
161+ class RelevantRegExpTerm extends RegExpTerm {
162+ RelevantRegExpTerm ( ) { getRoot ( this ) .isRelevant ( ) }
163+ }
164+
158165/**
159166 * Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
160167 *
161168 * Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
162169 * The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
163170 */
164- private predicate isCanonicalTerm ( RegExpTerm term , string str ) {
171+ private predicate isCanonicalTerm ( RelevantRegExpTerm term , string str ) {
165172 term =
166- rank [ 1 ] ( RegExpTerm t , Location loc , File file |
173+ min ( RelevantRegExpTerm t , Location loc , File file |
167174 loc = t .getLocation ( ) and
168175 file = t .getFile ( ) and
169176 str = t .getRawValue ( )
@@ -178,15 +185,15 @@ private predicate isCanonicalTerm(RegExpTerm term, string str) {
178185private newtype TInputSymbol =
179186 /** An input symbol corresponding to character `c`. */
180187 Char ( string c ) {
181- c = any ( RegexpCharacterConstant cc | getRoot ( cc ) . isRelevant ( ) ) .getValue ( ) .charAt ( _)
188+ c = any ( RegexpCharacterConstant cc | cc instanceof RelevantRegExpTerm ) .getValue ( ) .charAt ( _)
182189 } or
183190 /**
184191 * An input symbol representing all characters matched by
185192 * a (non-universal) character class that has string representation `charClassString`.
186193 */
187194 CharClass ( string charClassString ) {
188- exists ( RegExpTerm term | term .getRawValue ( ) = charClassString | getRoot ( term ) . isRelevant ( ) ) and
189- exists ( RegExpTerm recc | isCanonicalTerm ( recc , charClassString ) |
195+ exists ( RelevantRegExpTerm term | term .getRawValue ( ) = charClassString ) and
196+ exists ( RelevantRegExpTerm recc | isCanonicalTerm ( recc , charClassString ) |
190197 recc instanceof RegExpCharacterClass and
191198 not recc .( RegExpCharacterClass ) .isUniversalClass ( )
192199 or
@@ -626,13 +633,10 @@ RegExpRoot getRoot(RegExpTerm term) {
626633}
627634
628635private newtype TState =
629- Match ( RegExpTerm t , int i ) {
630- getRoot ( t ) .isRelevant ( ) and
631- (
632- i = 0
633- or
634- exists ( t .( RegexpCharacterConstant ) .getValue ( ) .charAt ( i ) )
635- )
636+ Match ( RelevantRegExpTerm t , int i ) {
637+ i = 0
638+ or
639+ exists ( t .( RegexpCharacterConstant ) .getValue ( ) .charAt ( i ) )
636640 } or
637641 Accept ( RegExpRoot l ) { l .isRelevant ( ) } or
638642 AcceptAnySuffix ( RegExpRoot l ) { l .isRelevant ( ) }
0 commit comments