Created
September 1, 2018 20:24
-
-
Save valkheim/1bb2495bfdafca91ef4870e40bf11e6c to your computer and use it in GitHub Desktop.
Basic java lexer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://www.giocc.com/writing-a-lexer-in-java-1-7-using-regex-named-capturing-groups.html | |
// javac Lexer.java && java Lexer | |
import java.util.ArrayList; | |
import java.util.regex.Pattern; | |
import java.util.regex.Matcher; | |
public class Lexer { | |
private static enum Token { | |
NUMBER("-?[0-9]+"), OPERATOR("[*|/|+|-]"), SKIP("[ \t\f\r\n]+"); | |
private final String pattern; | |
private Token(String pattern) { | |
this.pattern = pattern; | |
} | |
} | |
private static class Word { | |
private Token token; | |
private String lexeme; | |
private Word(Token token, String lexeme) { | |
this.token = token; | |
this.lexeme = lexeme; | |
} | |
@Override | |
public String toString() { | |
return String.format("%-10s => [%s]", token.name(), lexeme); | |
} | |
} | |
private static ArrayList<Word> lex(String input) { | |
// The tokens to return | |
ArrayList<Word> words = new ArrayList<Word>(); | |
// Lexer logic begins here | |
StringBuffer tokenPatternsBuffer = new StringBuffer(); | |
for (Token token : Token.values()) | |
tokenPatternsBuffer.append(String.format("|(?<%s>%s)", token.name(), token.pattern)); | |
Pattern tokenPatterns = Pattern.compile(new String(tokenPatternsBuffer.substring(1))); | |
// Begin matching tokens | |
Matcher matcher = tokenPatterns.matcher(input); | |
while (matcher.find()) { | |
for (Token token : Token.values()) | |
if (matcher.group(token.name()) != null) { | |
words.add(new Word(token, matcher.group(token.name()))); | |
continue; | |
} | |
} | |
return words; | |
} | |
public static void main(String[] args) { | |
String input = "11 + 22 - 33"; | |
ArrayList<Word> words = lex(input); | |
for (Word word : words) | |
System.out.println(word); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have used this code in a project where I made a programming language because it is done well. May I have your permission to use it in my project? If not then I will take my repo down. I just want to make sure I have your permission to use your code in my project. Have a good day.