Created
January 9, 2015 15:13
-
-
Save munificent/8b5c2240e7364913b6a6 to your computer and use it in GitHub Desktop.
Start of lexer for Wren in Wren
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Token { | |
new(type, text) { | |
_type = type | |
_text = text | |
} | |
type { _type } | |
text { _text } | |
toString { _text + " " + _type } | |
} | |
// Punctuators. | |
var tokenLeftParen = "leftParen" | |
var tokenRightParen = "rightParen" | |
var tokenLeftBracket = "leftBracket" | |
var tokenRightBracket = "rightBracket" | |
var tokenLeftBrace = "leftBrace" | |
var tokenRightBrace = "rightBrace" | |
var tokenColon = "colon" | |
var tokenDot = "dot" | |
var tokenDotDot = "dotDot" | |
var tokenDotDotDot = "dotDotDot" | |
var tokenComma = "comma" | |
var tokenStar = "star" | |
var tokenSlash = "slash" | |
var tokenPercent = "percent" | |
var tokenPlus = "plus" | |
var tokenMinus = "minus" | |
var tokenPipe = "pipe" | |
var tokenPipePipe = "pipePipe" | |
var tokenAmp = "amp" | |
var tokenAmpAmp = "ampAmp" | |
var tokenBang = "bang" | |
var tokenTilde = "tilde" | |
var tokenEqual = "equal" | |
var tokenLess = "less" | |
var tokenGreater = "greater" | |
var tokenLessEqual = "lessEqual" | |
var tokenGreaterEqual = "greaterEqual" | |
var tokenEqualEqual = "equalEqual" | |
var tokenBangEqual = "bangEqual" | |
// Keywords. | |
var tokenBreak = "break" | |
var tokenClass = "class" | |
var tokenElse = "else" | |
var tokenFalse = "false" | |
var tokenFor = "for" | |
var tokenIf = "if" | |
var tokenIn = "in" | |
var tokenIs = "is" | |
var tokenNew = "new" | |
var tokenNull = "null" | |
var tokenReturn = "return" | |
var tokenStatic = "static" | |
var tokenSuper = "super" | |
var tokenThis = "this" | |
var tokenTrue = "true" | |
var tokenVar = "var" | |
var tokenWhile = "while" | |
// TOKEN_FIELD | |
// TOKEN_STATIC_FIELD | |
var tokenName = "name" | |
var tokenNumber = "number" | |
var tokenString = "string" | |
var tokenLine = "line" | |
var tokenError = "error" | |
var tokenEof = "eof" | |
class Lexer { | |
new(source) { | |
_source = source | |
_start = 0 | |
_current = 0 | |
} | |
tokenize { | |
return new Fiber { | |
while (_current < _source.count) { | |
skipSpace | |
_start = _current | |
// TODO: A map or switch would be nice. | |
if (match("(")) { | |
makeToken(tokenLeftParen) | |
} else if (match(")")) { | |
makeToken(tokenRightParen) | |
} else if (match("[")) { | |
makeToken(tokenLeftBracket) | |
} else if (match("]")) { | |
makeToken(tokenRightBracket) | |
} else if (match("{")) { | |
makeToken(tokenLeftBrace) | |
} else if (match("}")) { | |
makeToken(tokenRightBrace) | |
} else if (match(":")) { | |
makeToken(tokenColon) | |
} else if (match(".")) { | |
if (match(".")) { | |
if (match(".")) { | |
makeToken(tokenDotDotDot) | |
} else { | |
makeToken(tokenDotDot) | |
} | |
} else { | |
makeToken(tokenDot) | |
} | |
} else if (match(",")) { | |
makeToken(tokenComma) | |
} else if (match("*")) { | |
makeToken(tokenStar) | |
} else if (match("/")) { | |
makeToken(tokenSlash) | |
} else if (match("%")) { | |
makeToken(tokenPercent) | |
} else if (match("+")) { | |
makeToken(tokenPlus) | |
} else if (match("-")) { | |
makeToken(tokenMinus) | |
} else if (match("|")) { | |
if (match("|")) { | |
makeToken(tokenPipePipe) | |
} else { | |
makeToken(tokenPipe) | |
} | |
} else if (match("&")) { | |
if (match("&")) { | |
makeToken(tokenAmpAmp) | |
} else { | |
makeToken(tokenAmp) | |
} | |
} else if (match("!")) { | |
if (match("=")) { | |
makeToken(tokenBangEqual) | |
} else { | |
makeToken(tokenBang) | |
} | |
} else if (match("~")) { | |
makeToken(tokenTilde) | |
} else if (match("=")) { | |
if (match("=")) { | |
makeToken(tokenEqualEqual) | |
} else { | |
makeToken(tokenEqual) | |
} | |
} else if (match("<")) { | |
if (match("=")) { | |
makeToken(tokenLessEqual) | |
} else { | |
makeToken(tokenLess) | |
} | |
} else if (match(">")) { | |
if (match("=")) { | |
makeToken(tokenGreaterEqual) | |
} else { | |
makeToken(tokenGreater) | |
} | |
} else if ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(peek)) { | |
// TODO: Better way to compare characters! | |
readName | |
} else { | |
// TODO: Do something better here. | |
advance | |
makeToken(tokenError) | |
} | |
} | |
_start = _current | |
makeToken(tokenEof) | |
} | |
} | |
// Advances past the current character. | |
advance { | |
_current = _current + 1 | |
} | |
// Gets the current character. | |
peek { _source[_current] } | |
// Consumes the current character if it is [c]. | |
match(c) { | |
if (_current < _source.count && _source[_current] == c) { | |
_current = _current + 1 | |
return true | |
} | |
return false | |
} | |
// Creates a token of [type] from the current character range. | |
makeToken(type) { | |
// TODO: Substring method. | |
var text = "" | |
for (i in _start..._current) { | |
text = text + _source[i] | |
} | |
Fiber.yield(new Token(type, text)) | |
} | |
// Skips over whitespace characters. | |
skipSpace { | |
while (match(" ") || match("\t")) { | |
// Already advanced. | |
} | |
} | |
// Reads an identifier or keyword token. | |
readName { | |
advance | |
while (_current < _source.count && "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789".contains(peek)) { | |
advance | |
} | |
var type = tokenName | |
// TODO: Unify with makeToken. | |
var text = "" | |
for (i in _start..._current) { | |
text = text + _source[i] | |
} | |
if (text == "break") { | |
type = tokenBreak | |
} else if (text == "class") { | |
type = tokenClass | |
} else if (text == "else") { | |
type = tokenElse | |
} else if (text == "false") { | |
type = tokenFalse | |
} else if (text == "for") { | |
type = tokenFor | |
} else if (text == "if") { | |
type = tokenIf | |
} else if (text == "in") { | |
type = tokenIn | |
} else if (text == "is") { | |
type = tokenIs | |
} else if (text == "new") { | |
type = tokenNew | |
} else if (text == "null") { | |
type = tokenNull | |
} else if (text == "return") { | |
type = tokenReturn | |
} else if (text == "static") { | |
type = tokenStatic | |
} else if (text == "super") { | |
type = tokenSuper | |
} else if (text == "this") { | |
type = tokenThis | |
} else if (text == "true") { | |
type = tokenTrue | |
} else if (text == "var") { | |
type = tokenVar | |
} else if (text == "while") { | |
type = tokenWhile | |
} | |
Fiber.yield(new Token(type, text)) | |
} | |
} | |
var s = "()(([ .foo_BAR123:..,... ]%|||&&& { \t}!~)+-*/=!===<><=>=\n" + | |
"break class else false for if in is new null return static super this true var while" | |
var lexer = new Lexer(s) | |
var tokens = lexer.tokenize | |
while (true) { | |
var token = tokens.call | |
if (tokens.isDone) break | |
IO.print(token) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment