Skip to content

Instantly share code, notes, and snippets.

@Deamon5550
Created August 17, 2016 23:55
Show Gist options
  • Save Deamon5550/8efe1988b6d2f3ac045bb4da732e34b7 to your computer and use it in GitHub Desktop.
Save Deamon5550/8efe1988b6d2f3ac045bb4da732e34b7 to your computer and use it in GitHub Desktop.
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ExpressionParser {
private static final boolean DEBUG = false;
private static void debug(String msg) {
if (DEBUG) {
System.out.println("[DEBUG] " + msg);
}
}
private final List<ExpressionTokenType> tokenTypes = new ArrayList<>();
public ExpressionParser() {
this.tokenTypes.add(ExpressionTokenType.STRING_TERMINAL);
this.tokenTypes.add(ExpressionTokenType.NUMBER_TERMINAL);
this.tokenTypes.add(ExpressionTokenType.VARIABLE_TERMINAL);
this.tokenTypes.add(ExpressionTokenType.LEFT_PAREN);
this.tokenTypes.add(ExpressionTokenType.RIGHT_PAREN);
this.tokenTypes.add(ExpressionTokenType.COMMA);
this.tokenTypes.add(ExpressionTokenType.PLUS);
this.tokenTypes.add(ExpressionTokenType.MINUS);
this.tokenTypes.add(ExpressionTokenType.MULTI);
this.tokenTypes.add(ExpressionTokenType.DIVIDE);
this.tokenTypes.add(ExpressionTokenType.MODULO);
}
public ExpressionToken[] parse(String exp) {
debug("Parsing '" + exp + "'");
List<ExpressionToken> tokens = splitTokens(exp);
List<ExpressionToken> result = new ArrayList<>(tokens.size());
Deque<ExpressionToken> stack = new ArrayDeque<>();
// Shunting yard algorithm
for (int i = 0; i < tokens.size(); i++) {
ExpressionToken token = tokens.get(i);
if (token.getType() == ExpressionTokenType.STRING_TERMINAL || token.getType() == ExpressionTokenType.NUMBER_TERMINAL) {
if (i > 0 && tokens.get(i - 1).getType().getCatagory() == ExpressionTokenCatagory.TERMINAL) {
throw new IllegalStateException("Syntax error: Cannot have two terminals with no operators between them");
}
debug("Outputting " + token.getType().getName() + " '" + token.getToken() + "'");
result.add(token);
} else if (token.getType() == ExpressionTokenType.VARIABLE_TERMINAL) {
if (i < tokens.size() - 1 && tokens.get(i + 1).getType() == ExpressionTokenType.LEFT_PAREN) {
token.setType(ExpressionTokenType.FUNCTION_TERMINAL);
debug("Stacking " + token.getType().getName() + " '" + token.getToken() + "'");
stack.push(token);
} else {
debug("Outputting " + token.getType().getName() + " '" + token.getToken() + "'");
result.add(token);
}
} else if (token.getType() == ExpressionTokenType.LEFT_PAREN) {
debug("Stacking " + token.getType().getName() + " '" + token.getToken() + "'");
stack.push(token);
} else if (token.getType() == ExpressionTokenType.RIGHT_PAREN) {
while (true) {
if (stack.isEmpty()) {
throw new IllegalStateException("Syntax error: found right paren without left paren in stack");
}
ExpressionToken next = stack.pop();
if (next.getType() == ExpressionTokenType.LEFT_PAREN) {
debug("Popping left paren");
break;
}
debug("Outputting " + next.getType().getName() + " '" + next.getToken() + "'");
result.add(next);
}
ExpressionToken next = stack.peek();
if (next != null && next.getType() == ExpressionTokenType.FUNCTION_TERMINAL) {
debug("Outputting " + next.getType().getName() + " '" + next.getToken() + "'");
result.add(stack.pop());
}
} else if (token.getType() == ExpressionTokenType.COMMA) {
while (true) {
if (stack.isEmpty()) {
throw new IllegalStateException("Syntax error: found comma not surrounded by braces");
}
ExpressionToken next = stack.peek();
if (next.getType() == ExpressionTokenType.LEFT_PAREN) {
break;
}
debug("Outputting " + next.getType().getName() + " '" + next.getToken() + "'");
result.add(stack.pop());
}
} else if (token.getType().getCatagory() == ExpressionTokenCatagory.BINARY_OP) {
if (token.getType() == ExpressionTokenType.MINUS) {
if (i > 0) {
ExpressionTokenCatagory cat = tokens.get(i - 1).getType().getCatagory();
if (cat == ExpressionTokenCatagory.BINARY_OP || cat == ExpressionTokenCatagory.UNARY_OP) {
debug("Converting minus to negative");
token.setType(ExpressionTokenType.NEGATIVE);
}
} else {
debug("Converting minus to negative");
token.setType(ExpressionTokenType.NEGATIVE);
}
}
while (!stack.isEmpty()) {
ExpressionToken head = stack.peek();
if (head.getType().getCatagory() == ExpressionTokenCatagory.BINARY_OP
|| head.getType().getCatagory() == ExpressionTokenCatagory.UNARY_OP) {
if (token.getType().isLeftAssociative()) {
if (token.getType().getPrecedence() <= head.getType().getPrecedence()) {
debug("Outputting " + head.getType().getName() + " '" + head.getToken() + "'");
result.add(stack.pop());
} else {
break;
}
} else {
if (token.getType().getPrecedence() < head.getType().getPrecedence()) {
debug("Outputting " + head.getType().getName() + " '" + head.getToken() + "'");
result.add(stack.pop());
} else {
break;
}
}
} else {
break;
}
}
stack.push(token);
}
}
while (!stack.isEmpty()) {
ExpressionToken token = stack.pop();
if (token.getType() == ExpressionTokenType.LEFT_PAREN || token.getType() == ExpressionTokenType.RIGHT_PAREN) {
continue;
}
debug("Outputting " + token.getType().getName() + " '" + token.getToken() + "'");
result.add(token);
}
return result.toArray(new ExpressionToken[result.size()]);
}
private List<ExpressionToken> splitTokens(String exp) {
List<ExpressionToken> tokens = new ArrayList<>();
for (int i = 0; i < exp.length(); i++) {
char next = exp.charAt(i);
if (next == ' ' || next == '\t' || next == '\n') {
continue;
}
for (ExpressionTokenType type : this.tokenTypes) {
int count = type.getMatcher().matchFrom(exp, i);
if (count > 0) {
String token = exp.substring(i, i + count);
i += count - 1;
debug("Matched " + type.getName() + " to token '" + token + "'");
tokens.add(new ExpressionToken(type, token));
break;
}
}
}
return tokens;
}
public static class ExpressionToken {
private ExpressionTokenType type;
private String token;
public ExpressionToken(ExpressionTokenType type, String token) {
this.type = type;
this.token = token;
}
public ExpressionTokenType getType() {
return this.type;
}
public void setType(ExpressionTokenType type) {
this.type = type;
}
public String getToken() {
return this.token;
}
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (!(o instanceof ExpressionToken)) {
return false;
}
ExpressionToken t = (ExpressionToken) o;
return this.type.equals(t.type) && this.token.equals(t.token);
}
}
public static enum ExpressionTokenCatagory {
TERMINAL,
FUNCTION,
UNARY_OP,
BINARY_OP
}
@FunctionalInterface
public static interface ExpressionMatcher {
int matchFrom(String exp, int index);
public static class ExactMatcher implements ExpressionMatcher {
private final String token;
public ExactMatcher(String token) {
this.token = token;
}
@Override
public int matchFrom(String exp, int index) {
if (exp.length() < index + this.token.length()) {
return 0;
}
return this.token.equals(exp.substring(index, index + this.token.length())) ? this.token.length() : 0;
}
}
public static class RegexMatcher implements ExpressionMatcher {
private final Pattern pattern;
public RegexMatcher(String regex) {
this.pattern = Pattern.compile(regex);
}
@Override
public int matchFrom(String exp, int index) {
Matcher m = this.pattern.matcher(exp);
if (m.find(index) && m.start() == index) {
return m.end() - m.start();
}
return 0;
}
}
}
public static class ExpressionTokenType {
// Literals
public static final ExpressionTokenType STRING_TERMINAL = new ExpressionTokenType("string", ExpressionTokenCatagory.TERMINAL,
new ExpressionMatcher.RegexMatcher("\"(?:[^\"\\\\]|\\\\.)*\""));
public static final ExpressionTokenType NUMBER_TERMINAL = new ExpressionTokenType("number", ExpressionTokenCatagory.TERMINAL,
new ExpressionMatcher.RegexMatcher("[0-9]+"));
public static final ExpressionTokenType VARIABLE_TERMINAL = new ExpressionTokenType("variable", ExpressionTokenCatagory.TERMINAL,
new ExpressionMatcher.RegexMatcher("[a-zA-Z_][a-zA-Z0-9_]*"));
// Functions
public static final ExpressionTokenType FUNCTION_TERMINAL = new ExpressionTokenType("function", ExpressionTokenCatagory.FUNCTION,
new ExpressionMatcher.RegexMatcher("[a-zA-Z_][a-zA-Z0-9_]*"));
public static final ExpressionTokenType LEFT_PAREN = new ExpressionTokenType("left_paren", ExpressionTokenCatagory.FUNCTION,
new ExpressionMatcher.ExactMatcher("("));
public static final ExpressionTokenType RIGHT_PAREN = new ExpressionTokenType("right_paren", ExpressionTokenCatagory.FUNCTION,
new ExpressionMatcher.ExactMatcher(")"));
public static final ExpressionTokenType COMMA = new ExpressionTokenType("comma", ExpressionTokenCatagory.FUNCTION,
new ExpressionMatcher.ExactMatcher(","));
// Operators
public static final ExpressionTokenType PLUS = new ExpressionTokenType("plus", ExpressionTokenCatagory.BINARY_OP, 2,
new ExpressionMatcher.ExactMatcher("+"));
public static final ExpressionTokenType MINUS = new ExpressionTokenType("minus", ExpressionTokenCatagory.BINARY_OP, 2,
new ExpressionMatcher.ExactMatcher("-"));
public static final ExpressionTokenType MULTI = new ExpressionTokenType("multiply", ExpressionTokenCatagory.BINARY_OP, 4,
new ExpressionMatcher.ExactMatcher("*"));
public static final ExpressionTokenType DIVIDE = new ExpressionTokenType("divide", ExpressionTokenCatagory.BINARY_OP, 4,
new ExpressionMatcher.ExactMatcher("/"));
public static final ExpressionTokenType MODULO = new ExpressionTokenType("modulo", ExpressionTokenCatagory.BINARY_OP, 4,
new ExpressionMatcher.ExactMatcher("%"));
public static final ExpressionTokenType NEGATIVE = new ExpressionTokenType("negative", ExpressionTokenCatagory.UNARY_OP, 3, false,
new ExpressionMatcher.ExactMatcher("-"));
private String name;
private ExpressionTokenCatagory catagory;
private int precedence;
private boolean leftAssociative;
private ExpressionMatcher matcher;
public ExpressionTokenType(String n, ExpressionTokenCatagory c, ExpressionMatcher matcher) {
this(n, c, 0, true, matcher);
}
public ExpressionTokenType(String n, ExpressionTokenCatagory c, int p, ExpressionMatcher matcher) {
this(n, c, p, true, matcher);
}
public ExpressionTokenType(String n, ExpressionTokenCatagory c, int p, boolean a, ExpressionMatcher matcher) {
this.name = n;
this.catagory = c;
this.precedence = p;
this.leftAssociative = a;
this.matcher = matcher;
}
public String getName() {
return this.name;
}
public ExpressionTokenCatagory getCatagory() {
return this.catagory;
}
public int getPrecedence() {
return this.precedence;
}
public boolean isLeftAssociative() {
return this.leftAssociative;
}
public ExpressionMatcher getMatcher() {
return this.matcher;
}
@Override
public String toString() {
return "ExpressionTokenType(name=" + this.name + ",catagory=" + this.catagory.name() + ")";
}
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (!(o instanceof ExpressionTokenType)) {
return false;
}
ExpressionTokenType t = (ExpressionTokenType) o;
return this.name.equals(t.name) && this.catagory == t.catagory && this.precedence == t.precedence;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment