33import java .nio .file .Path ;
44import java .util .ArrayList ;
55import java .util .Collections ;
6+ import java .util .HashSet ;
7+ import java .util .Arrays ;
68import java .util .List ;
79import java .util .Set ;
810import java .util .Stack ;
164166import com .semmle .util .trap .TrapWriter ;
165167import com .semmle .util .trap .TrapWriter .Label ;
166168
169+ import com .semmle .util .files .FileLineOffsetCache ;
170+
171+
167172/** Extractor for AST-based information; invoked by the {@link JSExtractor}. */
168173public class ASTExtractor {
169174 private final TrapWriter trapwriter ;
@@ -567,12 +572,17 @@ public Label visit(Literal nd, Context c) {
567572 String valueString = nd .getStringValue ();
568573
569574 trapwriter .addTuple ("literals" , valueString , source , key );
575+ Position start = nd .getLoc ().getStart ();
576+ com .semmle .util .locations .Position startPos = new com .semmle .util .locations .Position (start .getLine (), start .getColumn (), start .getOffset ());
577+
570578 if (nd .isRegExp ()) {
571579 OffsetTranslation offsets = new OffsetTranslation ();
572580 offsets .set (0 , 1 ); // skip the initial '/'
573- regexpExtractor .extract (source .substring (1 , source .lastIndexOf ('/' )), offsets , nd , false );
581+ SourceMap sourceMap = SourceMap .legacyWithStartPos (SourceMap .fromString (nd .getRaw ()).offsetBy (0 , offsets ), startPos );
582+ regexpExtractor .extract (source .substring (1 , source .lastIndexOf ('/' )), sourceMap , nd , false );
574583 } else if (nd .isStringLiteral () && !c .isInsideType () && nd .getRaw ().length () < 1000 ) {
575- regexpExtractor .extract (valueString , makeStringLiteralOffsets (nd .getRaw ()), nd , true );
584+ SourceMap sourceMap = SourceMap .legacyWithStartPos (SourceMap .fromString (nd .getRaw ()).offsetBy (0 , makeStringLiteralOffsets (nd .getRaw ())), startPos );
585+ regexpExtractor .extract (valueString , sourceMap , nd , true );
576586
577587 // Scan the string for template tags, if we're in a context where such tags are relevant.
578588 if (scopeManager .isInTemplateFile ()) {
@@ -593,6 +603,48 @@ private boolean isOctalDigit(char ch) {
593603 return '0' <= ch && ch <= '7' ;
594604 }
595605
606+ private String getStringConcatResult (Expression exp ) {
607+ if (exp instanceof BinaryExpression ) {
608+ BinaryExpression be = (BinaryExpression ) exp ;
609+ if (be .getOperator ().equals ("+" )) {
610+ String left = getStringConcatResult (be .getLeft ());
611+ String right = getStringConcatResult (be .getRight ());
612+ if (left != null && right != null ) {
613+ return left + right ;
614+ }
615+ }
616+ } else if (exp instanceof Literal ) {
617+ Literal lit = (Literal ) exp ;
618+ if (!lit .isStringLiteral ()) {
619+ return null ;
620+ }
621+ return lit .getStringValue ();
622+ }
623+ return null ;
624+ }
625+
626+ private OffsetTranslation computeStringConcatOffset (Expression exp ) {
627+ if (exp instanceof Literal && ((Literal )exp ).isStringLiteral ()) {
628+ String raw = ((Literal ) exp ).getRaw ();
629+ return makeStringLiteralOffsets (raw );
630+ }
631+
632+ if (exp instanceof BinaryExpression ) {
633+ BinaryExpression be = (BinaryExpression ) exp ;
634+ OffsetTranslation left = computeStringConcatOffset (be .getLeft ());
635+ OffsetTranslation right = computeStringConcatOffset (be .getRight ());
636+
637+ if (left == null || right == null ) {
638+ return null ;
639+ }
640+ int delta = be .getRight ().getLoc ().getStart ().getOffset () - be .getLeft ().getLoc ().getStart ().getOffset ();
641+ int offset = getStringConcatResult (be .getLeft ()).length ();
642+ return left .append (right , offset , delta );
643+ }
644+
645+ return null ;
646+ }
647+
596648 /**
597649 * Builds a translation from offsets in a string value back to its original raw literal text
598650 * (including quotes).
@@ -786,11 +838,31 @@ public Label visit(AssignmentExpression nd, Context c) {
786838 return key ;
787839 }
788840
841+ // set to determine which BinaryExpression has been extracted as regexp
842+ private Set <Expression > extractedAsRegexp = new HashSet <>();
843+
789844 @ Override
790845 public Label visit (BinaryExpression nd , Context c ) {
791846 Label key = super .visit (nd , c );
847+ extractedAsRegexp .add (nd .getLeft ());
848+ extractedAsRegexp .add (nd .getRight ());
792849 visit (nd .getLeft (), key , 0 );
793850 visit (nd .getRight (), key , 1 );
851+ if (extractedAsRegexp .contains (nd )) {
852+ return key ;
853+ }
854+ String rawString = getStringConcatResult (nd );
855+ if (rawString == null ) {
856+ return key ;
857+ }
858+ if (rawString .length () > 1000 && !rawString .trim ().isEmpty ()) {
859+ return key ;
860+ }
861+ OffsetTranslation offsets = computeStringConcatOffset (nd );
862+ Position start = nd .getLoc ().getStart ();
863+ com .semmle .util .locations .Position startPos = new com .semmle .util .locations .Position (start .getLine (), start .getColumn (), start .getOffset ());
864+ SourceMap sourceMap = SourceMap .legacyWithStartPos (SourceMap .fromString (nd .getLoc ().getSource ()).offsetBy (0 , offsets ), startPos );
865+ regexpExtractor .extract (rawString , sourceMap , nd , true );
794866 return key ;
795867 }
796868
0 commit comments