Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 57a9cad

Browse files
committed
JS: Fix offsets of octal and unicode escape
1 parent 4680e3a commit 57a9cad

4 files changed

Lines changed: 86 additions & 3 deletions

File tree

javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,10 @@ public Label visit(Literal nd, Context c) {
526526
return key;
527527
}
528528

529+
private boolean isOctalDigit(char ch) {
530+
return '0' <= ch && ch <= '7';
531+
}
532+
529533
/**
530534
* Builds a translation from offsets in a string value back to its original raw literal text
531535
* (including quotes).
@@ -551,8 +555,14 @@ public OffsetTranslation makeStringLiteralOffsets(String rawLiteral) {
551555
int outputLength = 1; // Number characters the sequence expands to.
552556
char ch = rawLiteral.charAt(pos + 1);
553557
if ('0' <= ch && ch <= '7') {
554-
// Octal escape: \NNN
555-
length = 4;
558+
// Octal escape: \N, \NN, or \NNN
559+
int firstDigit = pos + 1;
560+
int end = firstDigit;
561+
int maxEnd = Math.min(firstDigit + (ch <= '3' ? 3 : 2), rawLiteral.length());
562+
while (end < maxEnd && isOctalDigit(rawLiteral.charAt(end))) {
563+
++end;
564+
}
565+
length = end - pos;
556566
} else if (ch == 'x') {
557567
// Hex escape: \xNN
558568
length = 4;
@@ -562,11 +572,16 @@ public OffsetTranslation makeStringLiteralOffsets(String rawLiteral) {
562572
// Scan for the ending '}'
563573
int firstDigit = pos + 3;
564574
int end = firstDigit;
575+
int leadingZeros = 0;
576+
while (end < rawLiteral.length() && rawLiteral.charAt(end) == '0') {
577+
++end;
578+
++leadingZeros;
579+
}
565580
while (end < rawLiteral.length() && rawLiteral.charAt(end) != '}') {
566581
++end;
567582
}
568583
int numDigits = end - firstDigit;
569-
if (numDigits > 4) {
584+
if (numDigits - leadingZeros > 4) {
570585
outputLength = 2; // Encoded as a surrogate pair
571586
}
572587
++end; // Include '}' character
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
| tst.js:1:22:1:22 | . | . |
2+
| tst.js:2:23:2:23 | . | . |
3+
| tst.js:3:24:3:24 | . | . |
4+
| tst.js:4:23:4:23 | . | . |
5+
| tst.js:5:24:5:24 | . | . |
6+
| tst.js:6:24:6:24 | . | . |
7+
| tst.js:7:24:7:24 | . | . |
8+
| tst.js:8:24:8:24 | . | . |
9+
| tst.js:10:24:10:24 | . | . |
10+
| tst.js:11:24:11:24 | . | . |
11+
| tst.js:12:25:12:25 | . | . |
12+
| tst.js:13:25:13:25 | . | . |
13+
| tst.js:14:25:14:25 | . | . |
14+
| tst.js:15:25:15:25 | . | . |
15+
| tst.js:17:25:17:25 | . | . |
16+
| tst.js:18:25:18:25 | . | . |
17+
| tst.js:19:22:19:22 | . | . |
18+
| tst.js:20:23:20:23 | . | . |
19+
| tst.js:21:24:21:24 | . | . |
20+
| tst.js:23:26:23:26 | . | . |
21+
| tst.js:24:27:24:27 | . | . |
22+
| tst.js:25:28:25:28 | . | . |
23+
| tst.js:26:29:26:29 | . | . |
24+
| tst.js:27:30:27:30 | . | . |
25+
| tst.js:28:31:28:31 | . | . |
26+
| tst.js:30:27:30:27 | . | . |
27+
| tst.js:31:28:31:28 | . | . |
28+
| tst.js:32:29:32:29 | . | . |
29+
| tst.js:33:30:33:30 | . | . |
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import javascript
2+
3+
from StringLiteral literal, RegExpDot dot, int pos
4+
where dot.getParent*() = literal
5+
and pos = dot.getLocation().getStartColumn() - literal.getLocation().getStartColumn()
6+
select dot, literal.getRawValue().charAt(pos)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
new RegExp('\0 hello . \0 world \0');
2+
new RegExp('\00 hello . \00 world \00');
3+
new RegExp('\000 hello . \000 world \000');
4+
new RegExp('\12 hello . \12 world \12');
5+
new RegExp('\333 hello . \333 world \333');
6+
new RegExp('\444 hello . \444 world \444');
7+
new RegExp('\555 hello . \555 world \555');
8+
new RegExp('\666 hello . \666 world \666');
9+
new RegExp('\777 hello . \777 world \777');
10+
new RegExp('\787 hello . \787 world \787');
11+
new RegExp('\087 hello . \087 world \087');
12+
new RegExp('\3331 hello . \3331 world \3331');
13+
new RegExp('\4441 hello . \4441 world \4441');
14+
new RegExp('\5551 hello . \5551 world \5551');
15+
new RegExp('\6661 hello . \6661 world \6661');
16+
new RegExp('\7771 hello . \7771 world \7771');
17+
new RegExp('\7871 hello . \7871 world \7871');
18+
new RegExp('\0871 hello . \0871 world \0871');
19+
new RegExp('\8 hello . \8 world \8');
20+
new RegExp('\81 hello . \81 world \81');
21+
new RegExp('\811 hello . \811 world \811');
22+
23+
new RegExp('\u{a0} hello . \u{a0} world \u{a0}');
24+
new RegExp('\u{0a0} hello . \u{0a0} world \u{0a0}');
25+
new RegExp('\u{00a0} hello . \u{00a0} world \u{00a0}');
26+
new RegExp('\u{000a0} hello . \u{000a0} world \u{000a0}');
27+
new RegExp('\u{0000a0} hello . \u{0000a0} world \u{0000a0}');
28+
new RegExp('\u{00000a0} hello . \u{00000a0} world \u{00000a0}');
29+
30+
new RegExp('\u{1a0} hello . \u{1a0} world \u{1a0}');
31+
new RegExp('\u{10a0} hello . \u{10a0} world \u{10a0}');
32+
new RegExp('\u{100a0} hello . \u{100a0} world \u{100a0}');
33+
new RegExp('\u{1000a0} hello . \u{1000a0} world \u{1000a0}');

0 commit comments

Comments
 (0)