Created
August 17, 2016 23:55
-
-
Save Deamon5550/8efe1988b6d2f3ac045bb4da732e34b7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayDeque; | |
import java.util.ArrayList; | |
import java.util.Deque; | |
import java.util.List; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class ExpressionParser { | |
private static final boolean DEBUG = false; | |
private static void debug(String msg) { | |
if (DEBUG) { | |
System.out.println("[DEBUG] " + msg); | |
} | |
} | |
private final List<ExpressionTokenType> tokenTypes = new ArrayList<>(); | |
public ExpressionParser() { | |
this.tokenTypes.add(ExpressionTokenType.STRING_TERMINAL); | |
this.tokenTypes.add(ExpressionTokenType.NUMBER_TERMINAL); | |
this.tokenTypes.add(ExpressionTokenType.VARIABLE_TERMINAL); | |
this.tokenTypes.add(ExpressionTokenType.LEFT_PAREN); | |
this.tokenTypes.add(ExpressionTokenType.RIGHT_PAREN); | |
this.tokenTypes.add(ExpressionTokenType.COMMA); | |
this.tokenTypes.add(ExpressionTokenType.PLUS); | |
this.tokenTypes.add(ExpressionTokenType.MINUS); | |
this.tokenTypes.add(ExpressionTokenType.MULTI); | |
this.tokenTypes.add(ExpressionTokenType.DIVIDE); | |
this.tokenTypes.add(ExpressionTokenType.MODULO); | |
} | |
public ExpressionToken[] parse(String exp) { | |
debug("Parsing '" + exp + "'"); | |
List<ExpressionToken> tokens = splitTokens(exp); | |
List<ExpressionToken> result = new ArrayList<>(tokens.size()); | |
Deque<ExpressionToken> stack = new ArrayDeque<>(); | |
// Shunting yard algorithm | |
for (int i = 0; i < tokens.size(); i++) { | |
ExpressionToken token = tokens.get(i); | |
if (token.getType() == ExpressionTokenType.STRING_TERMINAL || token.getType() == ExpressionTokenType.NUMBER_TERMINAL) { | |
if (i > 0 && tokens.get(i - 1).getType().getCatagory() == ExpressionTokenCatagory.TERMINAL) { | |
throw new IllegalStateException("Syntax error: Cannot have two terminals with no operators between them"); | |
} | |
debug("Outputting " + token.getType().getName() + " '" + token.getToken() + "'"); | |
result.add(token); | |
} else if (token.getType() == ExpressionTokenType.VARIABLE_TERMINAL) { | |
if (i < tokens.size() - 1 && tokens.get(i + 1).getType() == ExpressionTokenType.LEFT_PAREN) { | |
token.setType(ExpressionTokenType.FUNCTION_TERMINAL); | |
debug("Stacking " + token.getType().getName() + " '" + token.getToken() + "'"); | |
stack.push(token); | |
} else { | |
debug("Outputting " + token.getType().getName() + " '" + token.getToken() + "'"); | |
result.add(token); | |
} | |
} else if (token.getType() == ExpressionTokenType.LEFT_PAREN) { | |
debug("Stacking " + token.getType().getName() + " '" + token.getToken() + "'"); | |
stack.push(token); | |
} else if (token.getType() == ExpressionTokenType.RIGHT_PAREN) { | |
while (true) { | |
if (stack.isEmpty()) { | |
throw new IllegalStateException("Syntax error: found right paren without left paren in stack"); | |
} | |
ExpressionToken next = stack.pop(); | |
if (next.getType() == ExpressionTokenType.LEFT_PAREN) { | |
debug("Popping left paren"); | |
break; | |
} | |
debug("Outputting " + next.getType().getName() + " '" + next.getToken() + "'"); | |
result.add(next); | |
} | |
ExpressionToken next = stack.peek(); | |
if (next != null && next.getType() == ExpressionTokenType.FUNCTION_TERMINAL) { | |
debug("Outputting " + next.getType().getName() + " '" + next.getToken() + "'"); | |
result.add(stack.pop()); | |
} | |
} else if (token.getType() == ExpressionTokenType.COMMA) { | |
while (true) { | |
if (stack.isEmpty()) { | |
throw new IllegalStateException("Syntax error: found comma not surrounded by braces"); | |
} | |
ExpressionToken next = stack.peek(); | |
if (next.getType() == ExpressionTokenType.LEFT_PAREN) { | |
break; | |
} | |
debug("Outputting " + next.getType().getName() + " '" + next.getToken() + "'"); | |
result.add(stack.pop()); | |
} | |
} else if (token.getType().getCatagory() == ExpressionTokenCatagory.BINARY_OP) { | |
if (token.getType() == ExpressionTokenType.MINUS) { | |
if (i > 0) { | |
ExpressionTokenCatagory cat = tokens.get(i - 1).getType().getCatagory(); | |
if (cat == ExpressionTokenCatagory.BINARY_OP || cat == ExpressionTokenCatagory.UNARY_OP) { | |
debug("Converting minus to negative"); | |
token.setType(ExpressionTokenType.NEGATIVE); | |
} | |
} else { | |
debug("Converting minus to negative"); | |
token.setType(ExpressionTokenType.NEGATIVE); | |
} | |
} | |
while (!stack.isEmpty()) { | |
ExpressionToken head = stack.peek(); | |
if (head.getType().getCatagory() == ExpressionTokenCatagory.BINARY_OP | |
|| head.getType().getCatagory() == ExpressionTokenCatagory.UNARY_OP) { | |
if (token.getType().isLeftAssociative()) { | |
if (token.getType().getPrecedence() <= head.getType().getPrecedence()) { | |
debug("Outputting " + head.getType().getName() + " '" + head.getToken() + "'"); | |
result.add(stack.pop()); | |
} else { | |
break; | |
} | |
} else { | |
if (token.getType().getPrecedence() < head.getType().getPrecedence()) { | |
debug("Outputting " + head.getType().getName() + " '" + head.getToken() + "'"); | |
result.add(stack.pop()); | |
} else { | |
break; | |
} | |
} | |
} else { | |
break; | |
} | |
} | |
stack.push(token); | |
} | |
} | |
while (!stack.isEmpty()) { | |
ExpressionToken token = stack.pop(); | |
if (token.getType() == ExpressionTokenType.LEFT_PAREN || token.getType() == ExpressionTokenType.RIGHT_PAREN) { | |
continue; | |
} | |
debug("Outputting " + token.getType().getName() + " '" + token.getToken() + "'"); | |
result.add(token); | |
} | |
return result.toArray(new ExpressionToken[result.size()]); | |
} | |
private List<ExpressionToken> splitTokens(String exp) { | |
List<ExpressionToken> tokens = new ArrayList<>(); | |
for (int i = 0; i < exp.length(); i++) { | |
char next = exp.charAt(i); | |
if (next == ' ' || next == '\t' || next == '\n') { | |
continue; | |
} | |
for (ExpressionTokenType type : this.tokenTypes) { | |
int count = type.getMatcher().matchFrom(exp, i); | |
if (count > 0) { | |
String token = exp.substring(i, i + count); | |
i += count - 1; | |
debug("Matched " + type.getName() + " to token '" + token + "'"); | |
tokens.add(new ExpressionToken(type, token)); | |
break; | |
} | |
} | |
} | |
return tokens; | |
} | |
public static class ExpressionToken { | |
private ExpressionTokenType type; | |
private String token; | |
public ExpressionToken(ExpressionTokenType type, String token) { | |
this.type = type; | |
this.token = token; | |
} | |
public ExpressionTokenType getType() { | |
return this.type; | |
} | |
public void setType(ExpressionTokenType type) { | |
this.type = type; | |
} | |
public String getToken() { | |
return this.token; | |
} | |
@Override | |
public boolean equals(Object o) { | |
if (o == this) { | |
return true; | |
} | |
if (!(o instanceof ExpressionToken)) { | |
return false; | |
} | |
ExpressionToken t = (ExpressionToken) o; | |
return this.type.equals(t.type) && this.token.equals(t.token); | |
} | |
} | |
public static enum ExpressionTokenCatagory { | |
TERMINAL, | |
FUNCTION, | |
UNARY_OP, | |
BINARY_OP | |
} | |
@FunctionalInterface | |
public static interface ExpressionMatcher { | |
int matchFrom(String exp, int index); | |
public static class ExactMatcher implements ExpressionMatcher { | |
private final String token; | |
public ExactMatcher(String token) { | |
this.token = token; | |
} | |
@Override | |
public int matchFrom(String exp, int index) { | |
if (exp.length() < index + this.token.length()) { | |
return 0; | |
} | |
return this.token.equals(exp.substring(index, index + this.token.length())) ? this.token.length() : 0; | |
} | |
} | |
public static class RegexMatcher implements ExpressionMatcher { | |
private final Pattern pattern; | |
public RegexMatcher(String regex) { | |
this.pattern = Pattern.compile(regex); | |
} | |
@Override | |
public int matchFrom(String exp, int index) { | |
Matcher m = this.pattern.matcher(exp); | |
if (m.find(index) && m.start() == index) { | |
return m.end() - m.start(); | |
} | |
return 0; | |
} | |
} | |
} | |
public static class ExpressionTokenType { | |
// Literals | |
public static final ExpressionTokenType STRING_TERMINAL = new ExpressionTokenType("string", ExpressionTokenCatagory.TERMINAL, | |
new ExpressionMatcher.RegexMatcher("\"(?:[^\"\\\\]|\\\\.)*\"")); | |
public static final ExpressionTokenType NUMBER_TERMINAL = new ExpressionTokenType("number", ExpressionTokenCatagory.TERMINAL, | |
new ExpressionMatcher.RegexMatcher("[0-9]+")); | |
public static final ExpressionTokenType VARIABLE_TERMINAL = new ExpressionTokenType("variable", ExpressionTokenCatagory.TERMINAL, | |
new ExpressionMatcher.RegexMatcher("[a-zA-Z_][a-zA-Z0-9_]*")); | |
// Functions | |
public static final ExpressionTokenType FUNCTION_TERMINAL = new ExpressionTokenType("function", ExpressionTokenCatagory.FUNCTION, | |
new ExpressionMatcher.RegexMatcher("[a-zA-Z_][a-zA-Z0-9_]*")); | |
public static final ExpressionTokenType LEFT_PAREN = new ExpressionTokenType("left_paren", ExpressionTokenCatagory.FUNCTION, | |
new ExpressionMatcher.ExactMatcher("(")); | |
public static final ExpressionTokenType RIGHT_PAREN = new ExpressionTokenType("right_paren", ExpressionTokenCatagory.FUNCTION, | |
new ExpressionMatcher.ExactMatcher(")")); | |
public static final ExpressionTokenType COMMA = new ExpressionTokenType("comma", ExpressionTokenCatagory.FUNCTION, | |
new ExpressionMatcher.ExactMatcher(",")); | |
// Operators | |
public static final ExpressionTokenType PLUS = new ExpressionTokenType("plus", ExpressionTokenCatagory.BINARY_OP, 2, | |
new ExpressionMatcher.ExactMatcher("+")); | |
public static final ExpressionTokenType MINUS = new ExpressionTokenType("minus", ExpressionTokenCatagory.BINARY_OP, 2, | |
new ExpressionMatcher.ExactMatcher("-")); | |
public static final ExpressionTokenType MULTI = new ExpressionTokenType("multiply", ExpressionTokenCatagory.BINARY_OP, 4, | |
new ExpressionMatcher.ExactMatcher("*")); | |
public static final ExpressionTokenType DIVIDE = new ExpressionTokenType("divide", ExpressionTokenCatagory.BINARY_OP, 4, | |
new ExpressionMatcher.ExactMatcher("/")); | |
public static final ExpressionTokenType MODULO = new ExpressionTokenType("modulo", ExpressionTokenCatagory.BINARY_OP, 4, | |
new ExpressionMatcher.ExactMatcher("%")); | |
public static final ExpressionTokenType NEGATIVE = new ExpressionTokenType("negative", ExpressionTokenCatagory.UNARY_OP, 3, false, | |
new ExpressionMatcher.ExactMatcher("-")); | |
private String name; | |
private ExpressionTokenCatagory catagory; | |
private int precedence; | |
private boolean leftAssociative; | |
private ExpressionMatcher matcher; | |
public ExpressionTokenType(String n, ExpressionTokenCatagory c, ExpressionMatcher matcher) { | |
this(n, c, 0, true, matcher); | |
} | |
public ExpressionTokenType(String n, ExpressionTokenCatagory c, int p, ExpressionMatcher matcher) { | |
this(n, c, p, true, matcher); | |
} | |
public ExpressionTokenType(String n, ExpressionTokenCatagory c, int p, boolean a, ExpressionMatcher matcher) { | |
this.name = n; | |
this.catagory = c; | |
this.precedence = p; | |
this.leftAssociative = a; | |
this.matcher = matcher; | |
} | |
public String getName() { | |
return this.name; | |
} | |
public ExpressionTokenCatagory getCatagory() { | |
return this.catagory; | |
} | |
public int getPrecedence() { | |
return this.precedence; | |
} | |
public boolean isLeftAssociative() { | |
return this.leftAssociative; | |
} | |
public ExpressionMatcher getMatcher() { | |
return this.matcher; | |
} | |
@Override | |
public String toString() { | |
return "ExpressionTokenType(name=" + this.name + ",catagory=" + this.catagory.name() + ")"; | |
} | |
@Override | |
public boolean equals(Object o) { | |
if (o == this) { | |
return true; | |
} | |
if (!(o instanceof ExpressionTokenType)) { | |
return false; | |
} | |
ExpressionTokenType t = (ExpressionTokenType) o; | |
return this.name.equals(t.name) && this.catagory == t.catagory && this.precedence == t.precedence; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment