Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
936 changes: 290 additions & 646 deletions jflex/src/main/cup/LexParse.cup

Large diffs are not rendered by default.

19 changes: 17 additions & 2 deletions jflex/src/main/java/jflex/chars/Interval.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ public final class Interval {
public int end;

/**
* Constructs a new interval from {@code start} to {@code end}, including both start and end
* points.
* Constructs a new interval from {@code start} to {@code end}, including both end points.
*
* @param start first codepoint the interval contains
* @param end last codepoint the interval contains
Expand All @@ -35,6 +34,22 @@ public Interval(int start, int end) {
this.end = end;
}

/**
* Constructs a new interval containing a single character.
*
* @param content the single character the interval should contain
*/
public Interval(int content) {
this.start = content;
this.end = content;
}

/** Copy constructor */
public Interval(Interval other) {
this.start = other.start;
this.end = other.end;
}

/**
* Returns {@code true} iff {@code point} is contained in this interval.
*
Expand Down
33 changes: 6 additions & 27 deletions jflex/src/main/java/jflex/core/CharClasses.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public class CharClasses {
private static final boolean DEBUG = false;

/** the largest character that can be used in char classes */
public static final int maxChar = 0x10FFFF;
static final int maxChar = 0x10FFFF;

/** the char classes */
private List<IntCharSet> classes;
Expand Down Expand Up @@ -121,6 +121,8 @@ public int getNumClasses() {
* @param caseless if true upper/lower/title case are considered equivalent
*/
public void makeClass(IntCharSet set, boolean caseless) {
set = set.copy(); // avoid destructively updating the original

if (caseless) set = set.getCaseless(unicodeProps);

if (DEBUG) {
Expand Down Expand Up @@ -275,7 +277,7 @@ public void makeClassNot(List<Interval> l, boolean caseless) {
* Returns an array that contains the character class codes of all characters in the specified set
* of input characters.
*/
private int[] getClassCodes(IntCharSet set, boolean negate) {
public int[] getClassCodes(IntCharSet set, boolean negate) {

if (DEBUG) {
Out.dump("getting class codes for " + set);
Expand All @@ -285,7 +287,7 @@ private int[] getClassCodes(IntCharSet set, boolean negate) {
int size = classes.size();

// [fixme: optimize]
int temp[] = new int[size];
int[] temp = new int[size];
int length = 0;

for (int i = 0; i < size; i++) {
Expand All @@ -303,35 +305,12 @@ private int[] getClassCodes(IntCharSet set, boolean negate) {
}
}

int result[] = new int[length];
int[] result = new int[length];
System.arraycopy(temp, 0, result, 0, length);

return result;
}

/**
* Returns an array that contains the character class codes of all characters in the specified set
* of input characters.
*
* @param intervalList a List of Intervals, the set of characters to get the class codes for
* @return an array with the class codes for intervalList
*/
public int[] getClassCodes(List<Interval> intervalList) {
return getClassCodes(new IntCharSet(intervalList), false);
}

/**
* Returns an array that contains the character class codes of all characters that are
* <strong>not</strong> in the specified set of input characters.
*
* @param intervalList a List of Intervals, the complement of the set of characters to get the
* class codes for
* @return an array with the class codes for the complement of intervalList
*/
public int[] getNotClassCodes(List<Interval> intervalList) {
return getClassCodes(new IntCharSet(intervalList), true);
}

/**
* Check consistency of the stored classes [debug].
*
Expand Down
47 changes: 45 additions & 2 deletions jflex/src/main/java/jflex/core/IntCharSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ public IntCharSet() {}

/** Creates a char set that contains only the given character. */
public IntCharSet(int c) {
this(new Interval(c, c));
this(new Interval(c));
}

/** Creates a char set that contains only the given interval. */
public IntCharSet(int start, int end) {
this(new Interval(start, end));
}

/** Creates a charset that contains only one interval. */
Expand All @@ -60,6 +65,28 @@ public IntCharSet(List<Interval> chars) {
for (Interval interval : chars) add(interval);
}

/**
* The set of all characters.
*
* @return a new IntCharSet that contains all characters.
*/
static IntCharSet allChars() {
return new IntCharSet(0, CharClasses.maxChar);
}

/**
* The set of new-line characters.
*
* @return a new IntCharSet that contains all characters that are considered a new-line char in
* Java.
*/
public static IntCharSet nlChars() {
IntCharSet set = new IntCharSet(new Interval('\n', '\r'));
set.add(new Interval('\u0085', '\u0085'));
set.add(new Interval('\u2028', '\u2029'));
return set;
}

/**
* returns the index of the interval that contains the character c, -1 if there is no such
* interval
Expand Down Expand Up @@ -194,7 +221,7 @@ public void add(int c) {
}

// end reached but nothing found -> append at end
intervals.add(new Interval(c, c));
intervals.add(new Interval(c));
}

/**
Expand All @@ -207,6 +234,21 @@ public boolean contains(int singleChar) {
return indexOf(singleChar) >= 0;
}

/**
* Check whether this set contains a another set.
*
* @param other an IntCharSet.
* @return true iff all characters of {@code other} are contained in this set.
*/
public boolean contains(IntCharSet other) {
// treat null as empty set
if (other == null) return true;
IntCharSet set = other.copy();
IntCharSet inter = this.and(other);
set.sub(inter);
return !set.containsElements();
}

/**
* {@inheritDoc}
*
Expand All @@ -222,6 +264,7 @@ public boolean equals(Object o) {
return Objects.equals(intervals, set.intervals);
}

/** {@inheritDoc} */
@Override
public int hashCode() {
int h = 1;
Expand Down
19 changes: 16 additions & 3 deletions jflex/src/main/java/jflex/core/Macros.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

package jflex.core;

import static java.util.stream.Collectors.toList;
import static jflex.l10n.ErrorMessages.MACRO_CYCLE;
import static jflex.l10n.ErrorMessages.get;

Expand Down Expand Up @@ -137,6 +138,7 @@ public void expand() throws jflex.exceptions.MacroException {
* @throws jflex.exceptions.MacroException when an error (such as a cyclic definition) occurs
* during expansion
*/
@SuppressWarnings("unchecked")
private RegExp expandMacro(String name, RegExp definition)
throws jflex.exceptions.MacroException {

Expand Down Expand Up @@ -180,15 +182,26 @@ private RegExp expandMacro(String name, RegExp definition)
case sym.STRING_I:
case sym.CHAR:
case sym.CHAR_I:
case sym.PRIMCLASS:
return definition;

case sym.CCLASS:
case sym.CCLASSNOT:
RegExp1 cclass = (RegExp1) definition;
List<RegExp> classes = (List<RegExp>) cclass.content;
cclass.content =
classes.stream().map(regexp -> expandMacro(name, regexp)).collect(toList());
return cclass;

case sym.CCLASSOP:
RegExp2 cclassOp = (RegExp2) ((RegExp1) definition).content;
cclassOp.r1 = expandMacro(name, cclassOp.r1);
cclassOp.r2 = expandMacro(name, cclassOp.r2);
return definition;

default:
throw new MacroException(
"unknown expression type "
+ definition.type
+ " in macro expansion"); // $NON-NLS-1$ //$NON-NLS-2$
"unknown expression type " + definition.typeName() + " in macro expansion");
}
}
}
52 changes: 10 additions & 42 deletions jflex/src/main/java/jflex/core/NFA.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ public final class NFA {
/** estimated size of the NFA (before actual construction) */
private int estSize;

Macros macros;
private CharClasses classes;

private LexScan scanner;
Expand Down Expand Up @@ -106,7 +105,6 @@ public NFA(int numInput, LexScan scanner, RegExps regExps, Macros macros, CharCl

this.scanner = scanner;
this.regExps = regExps;
this.macros = macros;
this.classes = classes;

numLexStates = scanner.states.number();
Expand Down Expand Up @@ -188,7 +186,7 @@ public void addRegExp(int regExpNum) {
// base forward pass
IntPair forward = insertNFA(r1);
// lookahead backward pass
IntPair backward = insertNFA(r2.rev(macros));
IntPair backward = insertNFA(r2.rev());

isFinal[forward.end()] = true;
action[forward.end()] = new Action(Action.FORWARD_ACTION);
Expand Down Expand Up @@ -222,9 +220,6 @@ private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
RegExp2 r = (RegExp2) lookAhead;
insertLookAheadChoices(baseEnd, a, r.r1);
insertLookAheadChoices(baseEnd, a, r.r2);
} else if (lookAhead.type == sym.MACROUSE) {
RegExp1 r = (RegExp1) lookAhead;
insertLookAheadChoices(baseEnd, a, macros.getDefinition((String) r.content));
} else {
int len = SemCheck.length(lookAhead);

Expand All @@ -243,11 +238,7 @@ private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
scanner.actions.add(x);
} else {
// should never happen
throw new Error(
"When inserting lookahead expression: unknown expression type "
+ lookAhead.type
+ " in "
+ lookAhead); // $NON-NLS-1$ //$NON-NLS-2$
throw new RegExpException(lookAhead);
}
}
}
Expand Down Expand Up @@ -671,22 +662,12 @@ private IntPair insertStringNFA(boolean caseless, String str) {
return IntPair.create(start, i + start);
}

private void insertClassNFA(List<Interval> intervals, int start, int end) {
// empty char class is ok:
if (intervals == null) return;

for (int aCl : classes.getClassCodes(intervals)) {
private void insertClassNFA(IntCharSet set, int start, int end) {
for (int aCl : classes.getClassCodes(set, false)) {
addTransition(start, aCl, end);
}
}

private void insertNotClassNFA(List<Interval> intervals, int start, int end) {

for (int input : classes.getNotClassCodes(intervals)) {
addTransition(start, input, end);
}
}

/**
* Constructs an NFA accepting the complement of the language of a given NFA.
*
Expand Down Expand Up @@ -909,9 +890,7 @@ private void removeDead(int start, int end) {
* <p>Assumes that regExp.isCharClass(macros) == true
*
* @param regExp the regular expression to construct the NFA for
* @return a pair of integers denoting the index of start and end state of the NFA.
*/
@SuppressWarnings("unchecked") // for List<Interval> casts
private void insertCCLNFA(RegExp regExp, int start, int end) {
switch (regExp.type) {
case sym.BAR:
Expand All @@ -920,12 +899,8 @@ private void insertCCLNFA(RegExp regExp, int start, int end) {
insertCCLNFA(r.r2, start, end);
return;

case sym.CCLASS:
insertClassNFA((List<Interval>) ((RegExp1) regExp).content, start, end);
return;

case sym.CCLASSNOT:
insertNotClassNFA((List<Interval>) ((RegExp1) regExp).content, start, end);
case sym.PRIMCLASS:
insertClassNFA((IntCharSet) ((RegExp1) regExp).content, start, end);
return;

case sym.CHAR:
Expand All @@ -935,13 +910,9 @@ private void insertCCLNFA(RegExp regExp, int start, int end) {
case sym.CHAR_I:
insertLetterNFA(true, (Integer) ((RegExp1) regExp).content, start, end);
return;

case sym.MACROUSE:
insertCCLNFA(macros.getDefinition((String) ((RegExp1) regExp).content), start, end);
return;
}

throw new Error("Unknown expression type " + regExp.type + " in NFA construction");
throw new RegExpException(regExp);
}

/**
Expand All @@ -963,7 +934,7 @@ public IntPair insertNFA(RegExp regExp) {
Out.debug("Inserting RegExp : " + regExp);
}

if (regExp.isCharClass(macros)) {
if (regExp.isCharClass()) {
start = numStates;
end = numStates + 1;

Expand Down Expand Up @@ -1040,19 +1011,16 @@ public IntPair insertNFA(RegExp regExp) {
return complement(insertNFA((RegExp) ((RegExp1) regExp).content));

case sym.TILDE:
return insertNFA(regExp.resolveTilde(macros));
return insertNFA(regExp.resolveTilde());

case sym.STRING:
return insertStringNFA(false, (String) ((RegExp1) regExp).content);

case sym.STRING_I:
return insertStringNFA(true, (String) ((RegExp1) regExp).content);

case sym.MACROUSE:
return insertNFA(macros.getDefinition((String) ((RegExp1) regExp).content));
}

throw new Error("Unknown expression type " + regExp.type + " in NFA construction");
throw new RegExpException(regExp);
}

public int numStates() {
Expand Down
Loading