Created
October 2, 2011 18:51
-
-
Save k33g/1257766 to your computer and use it in GitHub Desktop.
Coffeescript version of @ariyahidayat's lexer and parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lexical analysis | |
#http://ariya.ofilabs.com/2011/08/math-evaluator-in-javascript-part1.html | |
class Lexer | |
constructor:-> | |
@expression = '' | |
@length = 0 | |
@index = 0 | |
@marker = 0 | |
reset : (str) -> | |
@expression = str | |
@length = str.length | |
@index = 0 | |
#define the types of the tokens | |
isWhiteSpace : (ch) -> | |
(ch == "\u0009") or (ch == " ") or (ch == "\u00A0") | |
isLetter : (ch) -> | |
(ch >= "a" and ch <= "z") or (ch >= "A" and ch <= "Z") | |
isDecimalDigit : (ch) -> | |
(ch >= '0') and (ch <= '9') | |
#helper | |
createToken : (type, value) -> | |
( | |
type : type | |
value: value | |
start: @marker | |
end : @index - 1 | |
) | |
#a way to advance to the next character | |
getNextChar : -> | |
ch = "\x00" | |
idx = @index | |
if idx < @length | |
ch = @expression.charAt(idx) | |
@index += 1 | |
ch | |
#a way to have a paeek at the next without advancing our position | |
peekNextChar : -> | |
idx = @index | |
if (idx < @length) then @expression.charAt(idx) else "\x00" | |
#spaces do not matter | |
#ignore white spaces and continue move forward until there is no such white space anymore | |
skipSpaces : -> | |
while @index < @length | |
ch = @peekNextChar() | |
break unless @isWhiteSpace ch | |
@getNextChar() | |
#the operators which we need to support are + - * / = ( ) | |
scanOperator : -> | |
ch = @peekNextChar() | |
if "+-*/()=".indexOf(ch) >= 0 | |
@createToken "Operator", @getNextChar() | |
else | |
undefined | |
#Deciding whether a series of characters is an identifier | |
#we allow the first character to be a letter or an underscore | |
isIdentifierStart : (ch) -> | |
(ch == "_") or @isLetter(ch) | |
isIdentifierPart : (ch) -> | |
@isIdentifierStart(ch) or @isDecimalDigit(ch) | |
#check identifier | |
scanIdentifier : -> | |
ch = @peekNextChar() | |
return undefined unless @isIdentifierStart ch | |
#undefined unless @isIdentifierStart ch | |
#if not @isIdentifierStart(ch) then return undefined | |
id = @getNextChar() | |
while true | |
ch = @peekNextChar() | |
break unless @isIdentifierPart ch | |
#if not @isIdentifierPart(ch) then break | |
id += @getNextChar() | |
@createToken "Identifier", id | |
scanNumber : -> | |
ch = @peekNextChar() | |
if (not @isDecimalDigit ch) and (ch != ".") | |
return undefined | |
number = "" | |
if ch != "." | |
number = @getNextChar() | |
while true | |
ch = @peekNextChar() | |
break unless @isDecimalDigit ch | |
number += @getNextChar() | |
if ch == "." | |
number += @getNextChar() | |
while true | |
ch = @peekNextChar() | |
break unless @isDecimalDigit ch | |
number += @getNextChar() | |
if ch == "e" or ch == "E" | |
number += @getNextChar() | |
ch = @peekNextChar() | |
if ch == "+" or ch == "-" or @isDecimalDigit ch | |
number += @getNextChar() | |
while true | |
ch = @peekNextChar() | |
break unless @isDecimalDigit ch | |
number += @getNextChar() | |
else | |
throw "Unexpected after the exponent sign" | |
@createToken "Number", number | |
next : -> | |
@skipSpaces() | |
return undefined if @index >= @length | |
token = @scanNumber() | |
return token if typeof token != "undefined" | |
token = @scanOperator() | |
return token if typeof token != "undefined" | |
token = @scanIdentifier() | |
return token if typeof token != "undefined" | |
throw "Unknown token from character " + @peekNextChar() | |
peek : -> | |
idx = @index | |
try | |
token = @next() | |
delete token.start | |
delete token.end | |
catch e | |
token = undefined | |
@index = idx | |
token | |
# --- Global Scope --- | |
root = global ? window | |
root.Lexer = Lexer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#math expression evaluator | |
#http://ariya.ofilabs.com/2011/08/math-evaluator-in-javascript-part-2.html | |
class Parser | |
lexer : new Lexer() | |
#we need a function which can match an operator | |
# If the incoming operator is the same as the expected one, then it returns true. | |
@matchOp : (token, op) -> | |
(typeof token != "undefined") and token.type == "Operator" and token.value == op | |
@parseArgumentList : -> | |
args = [] | |
while true | |
expr = Parser.parseExpression() | |
break if typeof expr == "undefined" | |
args.push expr | |
token = Parser::lexer.peek() | |
break unless Parser.matchOp(token, ",") | |
Parser::lexer.next() | |
args | |
@parseFunctionCall : (name) -> | |
args = [] | |
token = Parser::lexer.next() | |
throw "Expecting ( in a function call #{name}" unless Parser.matchOp(token, "(") | |
token = Parser::lexer.peek() | |
args = Parser.parseArgumentList() unless Parser.matchOp(token, ")") | |
token = Parser::lexer.next() | |
throw "Expecting ) in a function call #{name}" unless Parser.matchOp(token, ")") | |
FunctionCall: | |
name: name | |
args: args | |
@parsePrimary : -> | |
token = Parser::lexer.peek() | |
if token.type == "Identifier" | |
token = Parser::lexer.next() | |
if Parser.matchOp(Parser::lexer.peek(), "(") | |
return Parser.parseFunctionCall(token.value) | |
else | |
return Identifier: token.value | |
if token.type == "Number" | |
token = Parser::lexer.next() | |
return Number: token.value | |
if Parser.matchOp(token, "(") | |
Parser::lexer.next() | |
expr = Parser.parseAssignement() | |
token = Parser::lexer.next() | |
throw "Expecting )" unless Parser.matchOp(token, ")") | |
return Expression: expr | |
throw "Parse error, can not process token #{token.value} " | |
@parseUnary : -> | |
token = Parser::lexer.peek() | |
if Parser.matchOp(token, "-") or Parser.matchOp(token, "+") | |
token = Parser::lexer.next() | |
expr = Parser.parseUnary() | |
return Unary: | |
operator: token.value | |
expression: expr | |
Parser.parsePrimary() | |
@parseMultiplicative : -> | |
left = Parser.parseUnary() | |
token = Parser::lexer.peek() | |
if Parser.matchOp(token, "*") or Parser.matchOp(token, "/") | |
token = Parser::lexer.next() | |
right = Parser.parseMultiplicative() | |
return Binary: | |
operator: token.value | |
left: left | |
right: right | |
left | |
### | |
The function parseAdditive processes both addition and subtraction, | |
i.e. it creates a binary operator node. There will be two child nodes, | |
the left and the right ones. They represent the two subexpression | |
### | |
@parseAdditive : -> | |
left = Parser.parseMultiplicative() | |
token = Parser::lexer.peek() | |
if Parser.matchOp(token, "+") or Parser.matchOp(token, "-") | |
token = Parser::lexer.next() | |
right = Parser.parseAdditive() | |
return Binary: | |
operator: token.value | |
left: left | |
right: right | |
left | |
@parseAssignement: -> | |
_parseAssignement = -> | |
expr = Parser.parseAdditive() | |
if typeof expr != "undefined" and expr.Identifier | |
token = Parser::lexer.peek() | |
if Parser.matchOp(token, "=") | |
Parser::lexer.next() | |
assignement = Assignment: | |
name: expr | |
value: _parseAssignement() | |
return assignement | |
return expr | |
expr | |
_parseAssignement() | |
@parseExpression: -> | |
Parser.parseAssignement() | |
@parse : (expression) -> | |
Parser::lexer.reset expression | |
expr = Parser.parseExpression() | |
token = Parser::lexer.next() | |
if typeof token != "undefined" | |
throw "Unexpected token #{token.value}" | |
Expression: expr | |
# --- Global Scope --- | |
root = global ? window | |
root.Parser = Parser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | |
<head> | |
<body></body> | |
<script src="https://raw.github.com/jashkenas/coffee-script/master/extras/coffee-script.js" type = "text/javascript"></script> | |
<script src="lexer.coffee" type = "text/coffeescript"></script> | |
<script src="parser.static.coffee" type = "text/coffeescript"></script> | |
<script type="text/coffeescript"> | |
src = "x = 45 *(67+ 8)" | |
#Use Lexer | |
lex = new Lexer() | |
lex.reset src | |
while true | |
out = lex.next() | |
break unless out | |
console.log "#{out.type} -> #{out.value}" | |
#Use Parser (lexer is instancied inside Parser) | |
console.log JSON.stringify Parser.parse "x = 45 *(67+ 8)" | |
</script> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment