Skip to content

Instantly share code, notes, and snippets.

@Sedose
Created June 16, 2025 20:08
Show Gist options
  • Select an option

  • Save Sedose/3014d40223ba0f18aef92b27e5182cf8 to your computer and use it in GitHub Desktop.

Select an option

Save Sedose/3014d40223ba0f18aef92b27e5182cf8 to your computer and use it in GitHub Desktop.
import java.util.ArrayList;
import java.util.List;
enum TokenType {
BANG,
EQUAL,
LEFT_PAREN,
RIGHT_PAREN,
LEFT_BRACE,
RIGHT_BRACE,
COMMA,
DOT,
MINUS,
PLUS,
SEMICOLON,
SLASH,
STAR,
BANG_EQUAL,
EQUAL_EQUAL,
LESS_EQUAL,
GREATER_EQUAL,
LESS,
GREATER,
EOF
}
record Token(TokenType type, String lexeme) {}
record TokenizationError(int line, String unexpectedChar) {}
record TokenizationResult(List<Token> tokens, List<TokenizationError> errors) {}
final class Tokenizer {
private final String source;
private final List<Token> tokens = new ArrayList<>();
private final List<TokenizationError> errors = new ArrayList<>();
private int current = 0;
private int line = 1;
private Tokenizer(String source) {
this.source = source;
}
static TokenizationResult tokenize(String source) {
var tokenizer = new Tokenizer(source);
tokenizer.scanTokens();
return new TokenizationResult(tokenizer.tokens, tokenizer.errors);
}
private void scanTokens() {
while (!isAtEnd()) {
char c = advance();
switch (c) {
case '(' -> add(TokenType.LEFT_PAREN);
case ')' -> add(TokenType.RIGHT_PAREN);
case '{' -> add(TokenType.LEFT_BRACE);
case '}' -> add(TokenType.RIGHT_BRACE);
case ',' -> add(TokenType.COMMA);
case '.' -> add(TokenType.DOT);
case '-' -> add(TokenType.MINUS);
case '+' -> add(TokenType.PLUS);
case ';' -> add(TokenType.SEMICOLON);
case '*' -> add(TokenType.STAR);
case '!' -> add(match('=') ? TokenType.BANG_EQUAL : TokenType.BANG);
case '=' -> add(match('=') ? TokenType.EQUAL_EQUAL : TokenType.EQUAL);
case '<' -> add(match('=') ? TokenType.LESS_EQUAL : TokenType.LESS);
case '>' -> add(match('=') ? TokenType.GREATER_EQUAL : TokenType.GREATER);
case '/' -> {
if (match('/')) {
while (peek() != '\n' && !isAtEnd()) advance();
} else {
add(TokenType.SLASH);
}
}
case ' ', '\r', '\t' -> {}
case '\n' -> line++;
default -> errors.add(new TokenizationError(line, String.valueOf(c)));
}
}
tokens.add(new Token(TokenType.EOF, ""));
}
private boolean match(char expected) {
if (isAtEnd()) return false;
if (source.charAt(current) != expected) return false;
current++;
return true;
}
private char peek() {
if (isAtEnd()) return '\0';
return source.charAt(current);
}
private char advance() {
return source.charAt(current++);
}
private boolean isAtEnd() {
return current >= source.length();
}
private void add(TokenType type) {
tokens.add(new Token(type, ""));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment