Skip to content

Instantly share code, notes, and snippets.

@Duality4Y
Created February 21, 2018 15:27
Show Gist options
  • Save Duality4Y/d9079222d3237940b1c236418e6c4740 to your computer and use it in GitHub Desktop.
Save Duality4Y/d9079222d3237940b1c236418e6c4740 to your computer and use it in GitHub Desktop.
craftinginterpreters scanner gist
package lox;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static lox.TokenType.*;
public class Scanner
{
private final String source;
private final List<Token> tokens = new ArrayList<>();
private int start = 0;
private int current = 0;
private int line = 1;
private static final Map<String, TokenType> keywords;
static
{
keywords = new HashMap<>();
keywords.put("and", AND);
keywords.put("class", CLASS);
keywords.put("else", ELSE);
keywords.put("false", FALSE);
keywords.put("for", FOR);
keywords.put("fun", FUN);
keywords.put("if", IF);
keywords.put("nil", NIL);
keywords.put("or", OR);
keywords.put("print", PRINT);
keywords.put("return", RETURN);
keywords.put("super", SUPER);
keywords.put("this", THIS);
keywords.put("true", TRUE);
keywords.put("var", VAR);
keywords.put("while", WHILE);
}
Scanner(String source)
{
this.source = source;
}
List<Token> scanTokens()
{
while(!isAtEnd())
{
start = current;
scanToken();
}
tokens.add(new Token(EOF, "", null, line));
return tokens;
}
private void scanToken()
{
char c = advance();
switch(c)
{
case '(': addToken(LEFT_PAREN); break;
case ')': addToken(RIGHT_PAREN); break;
case '{': addToken(LEFT_BRACE); break;
case '}': addToken(RIGHT_BRACE); break;
case ',': addToken(COMMA); break;
case '.': addToken(DOT); break;
case '-': addToken(MINUS); break;
case '+': addToken(PLUS); break;
case ';': addToken(SEMICOLON); break;
case '*': addToken(STAR); break;
case '!': addToken(match('=') ? BANG_EQUAL : BANG); break;
case '=': addToken(match('=') ? EQUAL_EQUAL : EQUAL); break;
case '<': addToken(match('=') ? LESS_EQUAL : LESS); break;
case '>': addToken(match('=') ? GREATER_EQUAL : GREATER); break;
case ' ':
case '\r':
case '\t':
break;
case '\n':
line++;
break;
case '/':
if(match('/'))
{
while(peek() != '\n' && !isAtEnd()) advance();
}
else if(match('*'))
{
mcomment();
}
else
{
addToken(SLASH);
}
break;
case '"': string(); break;
default:
if(isDigit(c))
{
number();
}
else if (isAlpha(c))
{
identifier();
}
else
{
Lox.error(line, "Unexpected character.");
}
break;
}
}
private void mcomment()
{
while(!(peek() == '*' && peekNext() == '/') && !isAtEnd())
{
advance();
if(peek() == '\n') line++;
// if another comment block found recurse to parse that one.
// do that as many times as there are inner comment blocks.
// one restriction even in comments /* have to match a closing */ !
if(peek() == '/' && peekNext() == '*')
{
// scanToken();
mcomment();
}
}
// is it the end? then it is clearly unterminated.
if(isAtEnd())
{
Lox.error(line, "Unterminated multi-line comment");
return;
}
// advance for * and /
advance();
advance();
}
private void identifier()
{
while(isAlphaNumeric(peek())) advance();
String text = source.substring(start, current);
TokenType type = keywords.get(text);
if(type == null) type = IDENTIFIER;
addToken(type);
}
private boolean isAlpha(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
}
private boolean isAlphaNumeric(char c)
{
return isAlpha(c) || isDigit(c);
}
private boolean isDigit(char c)
{
return c >= '0' && c <= '9';
}
private void number()
{
while(isDigit(peek())) advance();
if(peek() == '.' && isDigit(peekNext()))
{
advance();
while(isDigit(peek())) advance();
}
addToken(NUMBER, Double.parseDouble(source.substring(start, current)));
}
private char peekNext()
{
if(current + 1 >= source.length()) return '\0';
return source.charAt(current + 1);
}
private void string()
{
while(peek() != '"' && !isAtEnd())
{
if(peek() == '\n') line++;
advance();
}
if(isAtEnd())
{
Lox.error(line, "Unterminated string.");
return;
}
advance();
String value = source.substring(start + 1, current - 1);
addToken(STRING, value);
}
private char peek()
{
if(isAtEnd()) return '\0';
return source.charAt(current);
}
private boolean match(char expected)
{
if(isAtEnd()) return false;
if(source.charAt(current) != expected) return false;
current++;
return true;
}
private char advance()
{
current++;
return source.charAt(current - 1);
}
private void addToken(TokenType type)
{
addToken(type, null);
}
private void addToken(TokenType type, Object literal)
{
String text = source.substring(start, current);
tokens.add(new Token(type, text, literal, line));
}
private boolean isAtEnd()
{
return current >= source.length();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment