Created
February 21, 2018 15:27
-
-
Save Duality4Y/d9079222d3237940b1c236418e6c4740 to your computer and use it in GitHub Desktop.
craftinginterpreters scanner gist
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package lox; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import static lox.TokenType.*; | |
public class Scanner | |
{ | |
private final String source; | |
private final List<Token> tokens = new ArrayList<>(); | |
private int start = 0; | |
private int current = 0; | |
private int line = 1; | |
private static final Map<String, TokenType> keywords; | |
static | |
{ | |
keywords = new HashMap<>(); | |
keywords.put("and", AND); | |
keywords.put("class", CLASS); | |
keywords.put("else", ELSE); | |
keywords.put("false", FALSE); | |
keywords.put("for", FOR); | |
keywords.put("fun", FUN); | |
keywords.put("if", IF); | |
keywords.put("nil", NIL); | |
keywords.put("or", OR); | |
keywords.put("print", PRINT); | |
keywords.put("return", RETURN); | |
keywords.put("super", SUPER); | |
keywords.put("this", THIS); | |
keywords.put("true", TRUE); | |
keywords.put("var", VAR); | |
keywords.put("while", WHILE); | |
} | |
Scanner(String source) | |
{ | |
this.source = source; | |
} | |
List<Token> scanTokens() | |
{ | |
while(!isAtEnd()) | |
{ | |
start = current; | |
scanToken(); | |
} | |
tokens.add(new Token(EOF, "", null, line)); | |
return tokens; | |
} | |
private void scanToken() | |
{ | |
char c = advance(); | |
switch(c) | |
{ | |
case '(': addToken(LEFT_PAREN); break; | |
case ')': addToken(RIGHT_PAREN); break; | |
case '{': addToken(LEFT_BRACE); break; | |
case '}': addToken(RIGHT_BRACE); break; | |
case ',': addToken(COMMA); break; | |
case '.': addToken(DOT); break; | |
case '-': addToken(MINUS); break; | |
case '+': addToken(PLUS); break; | |
case ';': addToken(SEMICOLON); break; | |
case '*': addToken(STAR); break; | |
case '!': addToken(match('=') ? BANG_EQUAL : BANG); break; | |
case '=': addToken(match('=') ? EQUAL_EQUAL : EQUAL); break; | |
case '<': addToken(match('=') ? LESS_EQUAL : LESS); break; | |
case '>': addToken(match('=') ? GREATER_EQUAL : GREATER); break; | |
case ' ': | |
case '\r': | |
case '\t': | |
break; | |
case '\n': | |
line++; | |
break; | |
case '/': | |
if(match('/')) | |
{ | |
while(peek() != '\n' && !isAtEnd()) advance(); | |
} | |
else if(match('*')) | |
{ | |
mcomment(); | |
} | |
else | |
{ | |
addToken(SLASH); | |
} | |
break; | |
case '"': string(); break; | |
default: | |
if(isDigit(c)) | |
{ | |
number(); | |
} | |
else if (isAlpha(c)) | |
{ | |
identifier(); | |
} | |
else | |
{ | |
Lox.error(line, "Unexpected character."); | |
} | |
break; | |
} | |
} | |
private void mcomment() | |
{ | |
while(!(peek() == '*' && peekNext() == '/') && !isAtEnd()) | |
{ | |
advance(); | |
if(peek() == '\n') line++; | |
// if another comment block found recurse to parse that one. | |
// do that as many times as there are inner comment blocks. | |
// one restriction even in comments /* have to match a closing */ ! | |
if(peek() == '/' && peekNext() == '*') | |
{ | |
// scanToken(); | |
mcomment(); | |
} | |
} | |
// is it the end? then it is clearly unterminated. | |
if(isAtEnd()) | |
{ | |
Lox.error(line, "Unterminated multi-line comment"); | |
return; | |
} | |
// advance for * and / | |
advance(); | |
advance(); | |
} | |
private void identifier() | |
{ | |
while(isAlphaNumeric(peek())) advance(); | |
String text = source.substring(start, current); | |
TokenType type = keywords.get(text); | |
if(type == null) type = IDENTIFIER; | |
addToken(type); | |
} | |
private boolean isAlpha(char c) | |
{ | |
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; | |
} | |
private boolean isAlphaNumeric(char c) | |
{ | |
return isAlpha(c) || isDigit(c); | |
} | |
private boolean isDigit(char c) | |
{ | |
return c >= '0' && c <= '9'; | |
} | |
private void number() | |
{ | |
while(isDigit(peek())) advance(); | |
if(peek() == '.' && isDigit(peekNext())) | |
{ | |
advance(); | |
while(isDigit(peek())) advance(); | |
} | |
addToken(NUMBER, Double.parseDouble(source.substring(start, current))); | |
} | |
private char peekNext() | |
{ | |
if(current + 1 >= source.length()) return '\0'; | |
return source.charAt(current + 1); | |
} | |
private void string() | |
{ | |
while(peek() != '"' && !isAtEnd()) | |
{ | |
if(peek() == '\n') line++; | |
advance(); | |
} | |
if(isAtEnd()) | |
{ | |
Lox.error(line, "Unterminated string."); | |
return; | |
} | |
advance(); | |
String value = source.substring(start + 1, current - 1); | |
addToken(STRING, value); | |
} | |
private char peek() | |
{ | |
if(isAtEnd()) return '\0'; | |
return source.charAt(current); | |
} | |
private boolean match(char expected) | |
{ | |
if(isAtEnd()) return false; | |
if(source.charAt(current) != expected) return false; | |
current++; | |
return true; | |
} | |
private char advance() | |
{ | |
current++; | |
return source.charAt(current - 1); | |
} | |
private void addToken(TokenType type) | |
{ | |
addToken(type, null); | |
} | |
private void addToken(TokenType type, Object literal) | |
{ | |
String text = source.substring(start, current); | |
tokens.add(new Token(type, text, literal, line)); | |
} | |
private boolean isAtEnd() | |
{ | |
return current >= source.length(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment