@@ -209,11 +209,20 @@ predicate step(
209209 */
210210pragma [ noinline]
211211string getAThreewayIntersect ( InputSymbol s1 , InputSymbol s2 , InputSymbol s3 ) {
212- result = intersect ( s1 , s2 ) and result = [ intersect ( s2 , s3 ) , intersect ( s1 , s3 ) ]
212+ result = minAndMaxIntersect ( s1 , s2 ) and result = [ intersect ( s2 , s3 ) , intersect ( s1 , s3 ) ]
213213 or
214- result = intersect ( s1 , s3 ) and result = [ intersect ( s2 , s3 ) , intersect ( s1 , s2 ) ]
214+ result = minAndMaxIntersect ( s1 , s3 ) and result = [ intersect ( s2 , s3 ) , intersect ( s1 , s2 ) ]
215215 or
216- result = intersect ( s2 , s3 ) and result = [ intersect ( s1 , s2 ) , intersect ( s1 , s3 ) ]
216+ result = minAndMaxIntersect ( s2 , s3 ) and result = [ intersect ( s1 , s2 ) , intersect ( s1 , s3 ) ]
217+ }
218+
219+ /**
220+ * Gets the minimum and maximum characters that intersect between `a` and `b`.
221+ * This predicate is used to limit the size of `getAThreewayIntersect`.
222+ */
223+ pragma [ noinline]
224+ string minAndMaxIntersect ( InputSymbol a , InputSymbol b ) {
225+ result = [ min ( intersect ( a , b ) ) , max ( intersect ( a , b ) ) ]
217226}
218227
219228private newtype TTrace =
@@ -347,15 +356,11 @@ predicate isPumpable(State pivot, State succ, string pump) {
347356/**
348357 * Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
349358 */
350- predicate polynimalReDoS ( RegExpTerm t , string msg ) {
351- exists ( string pump , State s , string prefixMsg |
359+ predicate polynimalReDoS ( RegExpTerm t , string pump , string prefixMsg , RegExpTerm prev ) {
360+ exists ( State s , State pivot |
352361 hasReDoSResult ( t , pump , s , prefixMsg ) and
353- exists ( State pivot |
354- isPumpable ( pivot , s , _) and
355- msg =
356- "Strings " + prefixMsg + "with many repetitions of '" + pump +
357- "' can start matching anywhere after the start of the preceeding " + pivot .getRepr ( )
358- )
362+ isPumpable ( pivot , s , _) and
363+ prev = pivot .getRepr ( )
359364 )
360365}
361366
@@ -388,17 +393,30 @@ private predicate matchesEpsilon(RegExpTerm t) {
388393 forex ( RegExpTerm child | child = t .( RegExpSequence ) .getAChild ( ) | matchesEpsilon ( child ) )
389394}
390395
396+ /**
397+ * Gets a message for why `term` can cause polynomial backtracking.
398+ */
399+ string getReasonString ( RegExpTerm term , string pump , string prefixMsg , RegExpTerm prev ) {
400+ polynimalReDoS ( term , pump , prefixMsg , prev ) and
401+ result =
402+ "Strings " + prefixMsg + "with many repetitions of '" + pump +
403+ "' can start matching anywhere after the start of the preceeding " + prev
404+ }
405+
391406/**
392407 * A term that may cause a regular expression engine to perform a
393408 * polynomial number of match attempts, relative to the input length.
394409 */
395410class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
396411 string reason ;
412+ string pump ;
413+ string prefixMsg ;
414+ RegExpTerm prev ;
397415
398416 PolynomialBackTrackingTerm ( ) {
399- polynimalReDoS ( this , _ ) and
400- // there might be many reasons for this term to have polynomial backtracking - we pick an arbitary one.
401- reason = min ( string msg | polynimalReDoS ( this , msg ) )
417+ reason = getReasonString ( this , pump , prefixMsg , prev ) and
418+ // there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
419+ reason = min ( string msg | msg = getReasonString ( this , _ , _ , _ ) | msg order by msg . length ( ) , msg )
402420 }
403421
404422 /**
@@ -410,6 +428,21 @@ class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
410428 )
411429 }
412430
431+ /**
432+ * Gets the string that should be repeated to cause this regular expression to perform polynomially.
433+ */
434+ string getPumpString ( ) { result = pump }
435+
436+ /**
437+ * Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
438+ */
439+ string getPrefixMessage ( ) { result = prefixMsg }
440+
441+ /**
442+ * Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
443+ */
444+ RegExpTerm getPreviousLoop ( ) { result = prev }
445+
413446 /**
414447 * Gets the reason for the number of match attempts.
415448 */
0 commit comments