Skip to content

Instantly share code, notes, and snippets.

@k33g
Created October 2, 2011 18:51
Show Gist options
  • Save k33g/1257766 to your computer and use it in GitHub Desktop.
Save k33g/1257766 to your computer and use it in GitHub Desktop.
Coffeescript version of @ariyahidayat's lexer and parser
#lexical analysis
#http://ariya.ofilabs.com/2011/08/math-evaluator-in-javascript-part1.html
class Lexer
constructor:->
@expression = ''
@length = 0
@index = 0
@marker = 0
reset : (str) ->
@expression = str
@length = str.length
@index = 0
#define the types of the tokens
isWhiteSpace : (ch) ->
(ch == "\u0009") or (ch == " ") or (ch == "\u00A0")
isLetter : (ch) ->
(ch >= "a" and ch <= "z") or (ch >= "A" and ch <= "Z")
isDecimalDigit : (ch) ->
(ch >= '0') and (ch <= '9')
#helper
createToken : (type, value) ->
(
type : type
value: value
start: @marker
end : @index - 1
)
#a way to advance to the next character
getNextChar : ->
ch = "\x00"
idx = @index
if idx < @length
ch = @expression.charAt(idx)
@index += 1
ch
#a way to have a paeek at the next without advancing our position
peekNextChar : ->
idx = @index
if (idx < @length) then @expression.charAt(idx) else "\x00"
#spaces do not matter
#ignore white spaces and continue move forward until there is no such white space anymore
skipSpaces : ->
while @index < @length
ch = @peekNextChar()
break unless @isWhiteSpace ch
@getNextChar()
#the operators which we need to support are + - * / = ( )
scanOperator : ->
ch = @peekNextChar()
if "+-*/()=".indexOf(ch) >= 0
@createToken "Operator", @getNextChar()
else
undefined
#Deciding whether a series of characters is an identifier
#we allow the first character to be a letter or an underscore
isIdentifierStart : (ch) ->
(ch == "_") or @isLetter(ch)
isIdentifierPart : (ch) ->
@isIdentifierStart(ch) or @isDecimalDigit(ch)
#check identifier
scanIdentifier : ->
ch = @peekNextChar()
return undefined unless @isIdentifierStart ch
#undefined unless @isIdentifierStart ch
#if not @isIdentifierStart(ch) then return undefined
id = @getNextChar()
while true
ch = @peekNextChar()
break unless @isIdentifierPart ch
#if not @isIdentifierPart(ch) then break
id += @getNextChar()
@createToken "Identifier", id
scanNumber : ->
ch = @peekNextChar()
if (not @isDecimalDigit ch) and (ch != ".")
return undefined
number = ""
if ch != "."
number = @getNextChar()
while true
ch = @peekNextChar()
break unless @isDecimalDigit ch
number += @getNextChar()
if ch == "."
number += @getNextChar()
while true
ch = @peekNextChar()
break unless @isDecimalDigit ch
number += @getNextChar()
if ch == "e" or ch == "E"
number += @getNextChar()
ch = @peekNextChar()
if ch == "+" or ch == "-" or @isDecimalDigit ch
number += @getNextChar()
while true
ch = @peekNextChar()
break unless @isDecimalDigit ch
number += @getNextChar()
else
throw "Unexpected after the exponent sign"
@createToken "Number", number
next : ->
@skipSpaces()
return undefined if @index >= @length
token = @scanNumber()
return token if typeof token != "undefined"
token = @scanOperator()
return token if typeof token != "undefined"
token = @scanIdentifier()
return token if typeof token != "undefined"
throw "Unknown token from character " + @peekNextChar()
peek : ->
idx = @index
try
token = @next()
delete token.start
delete token.end
catch e
token = undefined
@index = idx
token
# --- Global Scope ---
root = global ? window
root.Lexer = Lexer
#math expression evaluator
#http://ariya.ofilabs.com/2011/08/math-evaluator-in-javascript-part-2.html
class Parser
lexer : new Lexer()
#we need a function which can match an operator
# If the incoming operator is the same as the expected one, then it returns true.
@matchOp : (token, op) ->
(typeof token != "undefined") and token.type == "Operator" and token.value == op
@parseArgumentList : ->
args = []
while true
expr = Parser.parseExpression()
break if typeof expr == "undefined"
args.push expr
token = Parser::lexer.peek()
break unless Parser.matchOp(token, ",")
Parser::lexer.next()
args
@parseFunctionCall : (name) ->
args = []
token = Parser::lexer.next()
throw "Expecting ( in a function call #{name}" unless Parser.matchOp(token, "(")
token = Parser::lexer.peek()
args = Parser.parseArgumentList() unless Parser.matchOp(token, ")")
token = Parser::lexer.next()
throw "Expecting ) in a function call #{name}" unless Parser.matchOp(token, ")")
FunctionCall:
name: name
args: args
@parsePrimary : ->
token = Parser::lexer.peek()
if token.type == "Identifier"
token = Parser::lexer.next()
if Parser.matchOp(Parser::lexer.peek(), "(")
return Parser.parseFunctionCall(token.value)
else
return Identifier: token.value
if token.type == "Number"
token = Parser::lexer.next()
return Number: token.value
if Parser.matchOp(token, "(")
Parser::lexer.next()
expr = Parser.parseAssignement()
token = Parser::lexer.next()
throw "Expecting )" unless Parser.matchOp(token, ")")
return Expression: expr
throw "Parse error, can not process token #{token.value} "
@parseUnary : ->
token = Parser::lexer.peek()
if Parser.matchOp(token, "-") or Parser.matchOp(token, "+")
token = Parser::lexer.next()
expr = Parser.parseUnary()
return Unary:
operator: token.value
expression: expr
Parser.parsePrimary()
@parseMultiplicative : ->
left = Parser.parseUnary()
token = Parser::lexer.peek()
if Parser.matchOp(token, "*") or Parser.matchOp(token, "/")
token = Parser::lexer.next()
right = Parser.parseMultiplicative()
return Binary:
operator: token.value
left: left
right: right
left
###
The function parseAdditive processes both addition and subtraction,
i.e. it creates a binary operator node. There will be two child nodes,
the left and the right ones. They represent the two subexpression
###
@parseAdditive : ->
left = Parser.parseMultiplicative()
token = Parser::lexer.peek()
if Parser.matchOp(token, "+") or Parser.matchOp(token, "-")
token = Parser::lexer.next()
right = Parser.parseAdditive()
return Binary:
operator: token.value
left: left
right: right
left
@parseAssignement: ->
_parseAssignement = ->
expr = Parser.parseAdditive()
if typeof expr != "undefined" and expr.Identifier
token = Parser::lexer.peek()
if Parser.matchOp(token, "=")
Parser::lexer.next()
assignement = Assignment:
name: expr
value: _parseAssignement()
return assignement
return expr
expr
_parseAssignement()
@parseExpression: ->
Parser.parseAssignement()
@parse : (expression) ->
Parser::lexer.reset expression
expr = Parser.parseExpression()
token = Parser::lexer.next()
if typeof token != "undefined"
throw "Unexpected token #{token.value}"
Expression: expr
# --- Global Scope ---
root = global ? window
root.Parser = Parser
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<head>
<body></body>
<script src="https://raw.github.com/jashkenas/coffee-script/master/extras/coffee-script.js" type = "text/javascript"></script>
<script src="lexer.coffee" type = "text/coffeescript"></script>
<script src="parser.static.coffee" type = "text/coffeescript"></script>
<script type="text/coffeescript">
src = "x = 45 *(67+ 8)"
#Use Lexer
lex = new Lexer()
lex.reset src
while true
out = lex.next()
break unless out
console.log "#{out.type} -> #{out.value}"
#Use Parser (lexer is instancied inside Parser)
console.log JSON.stringify Parser.parse "x = 45 *(67+ 8)"
</script>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment