Created
December 13, 2011 17:57
-
-
Save nettok/1473125 to your computer and use it in GitHub Desktop.
Recursive descent parser for mathematic expressions with AST generation by shunting-yard algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(expr). | |
-export([parse/1, eval/1]). | |
% Recursive descent parser for mathematic expressions with AST generation by shunting-yard algorithm ---------------- | |
% TODO: more error checking | |
% grammar | |
% ~~~~~~~ | |
% | |
% Expr : '~'? Term (('+'|'-') Term)* | |
% | |
% Term : Factor (('*'|'/') Factor)* | |
% | |
% Factor : (Number | '(' Expr ')') | |
parse(StrExpr) -> | |
{ok, Tokens, _} = erl_scan:string(StrExpr), | |
{RemainingTokens, TokenStack, OutputQueue} = parse_expr({Tokens, [], queue:new()}), | |
if | |
length(RemainingTokens) > 0 -> | |
erlang:error("couldn't process all input"); | |
true -> | |
FinalOutputQueue = lists:foldl(fun(Item, OQ) -> queue:in(Item, OQ) end, OutputQueue, TokenStack), | |
output_to_ast(FinalOutputQueue) | |
end. | |
parse_expr({[Token|Next], TokenStack, OutputQueue}) -> | |
case Token of | |
{'~', _} -> | |
{NewTokenStack, NewOutputQueue} = push_op('~', TokenStack, OutputQueue), | |
parse_expr_next_term(parse_term({Next, NewTokenStack, NewOutputQueue})); | |
_ -> | |
parse_expr_next_term(parse_term({[Token|Next], TokenStack, OutputQueue})) | |
end. | |
parse_expr_next_term({[], TokenStack, OutputQueue}) -> {[], TokenStack, OutputQueue}; | |
parse_expr_next_term({[Token|Next], TokenStack, OutputQueue}) -> | |
case Token of | |
{'+', _} -> | |
{NewTokenStack, NewOutputQueue} = push_op('+', TokenStack, OutputQueue), | |
parse_expr_next_term(parse_term({Next, NewTokenStack, NewOutputQueue})); | |
{'-', _} -> | |
{NewTokenStack, NewOutputQueue} = push_op('-', TokenStack, OutputQueue), | |
parse_expr_next_term(parse_term({Next, NewTokenStack, NewOutputQueue})); | |
_ -> | |
{[Token|Next], TokenStack, OutputQueue} | |
end. | |
parse_term({Tokens, TokenStack, OutputQueue}) -> parse_term_next_factor(parse_factor({Tokens, TokenStack, OutputQueue})). | |
parse_term_next_factor({[], TokenStack, OutputQueue}) -> {[], TokenStack, OutputQueue}; | |
parse_term_next_factor({[Token|Next], TokenStack, OutputQueue}) -> | |
case Token of | |
{'*', _} -> | |
{NewTokenStack, NewOutputQueue} = push_op('*', TokenStack, OutputQueue), | |
parse_term_next_factor(parse_factor({Next, NewTokenStack, NewOutputQueue})); | |
{'/', _} -> | |
{NewTokenStack, NewOutputQueue} = push_op('/', TokenStack, OutputQueue), | |
parse_term_next_factor(parse_factor({Next, NewTokenStack, NewOutputQueue})); | |
_ -> | |
{[Token|Next], TokenStack, OutputQueue} | |
end. | |
parse_factor({[Token|Next], TokenStack, OutputQueue}) -> | |
case Token of | |
{integer, _, Number} -> | |
NewOutputQueue = queue:in(Number, OutputQueue), | |
{Next, TokenStack, NewOutputQueue}; | |
{float, _, Number} -> | |
NewOutputQueue = queue:in(Number, OutputQueue), | |
{Next, TokenStack, NewOutputQueue}; | |
{'(', _} -> | |
NewTokenStack = ['('|TokenStack], | |
parse_factor_rparen(parse_expr({Next, NewTokenStack, OutputQueue})) | |
end. | |
parse_factor_rparen({[{')', _}|Next], TokenStack, OutputQueue}) -> | |
{NewTokenStack, NewOutputQueue} = push_rparen(TokenStack, OutputQueue), | |
{Next, NewTokenStack, NewOutputQueue}. | |
% -- ast functions -- | |
push_op(Op, [], OutputQueue) -> {[Op], OutputQueue}; | |
push_op(Op, [Top|TokenStack], OutputQueue) -> | |
case precedence_lte(Op, Top) of | |
true -> push_op(Op, TokenStack, queue:in(Top, OutputQueue)); | |
false -> {[Op|[Top|TokenStack]], OutputQueue} | |
end. | |
push_rparen([Top|TokenStack], OutputQueue) -> | |
case Top of | |
'(' -> {TokenStack, OutputQueue}; | |
_ -> push_rparen(TokenStack, queue:in(Top, OutputQueue)) | |
end. | |
precedence_lte(Op1, Op2) -> | |
case is_op(Op1) of | |
false -> erlang:error("Op1 must be an operator"); | |
true -> true | |
end, | |
if | |
Op2 == '~' -> true; | |
(Op2 == '*') orelse (Op2 == '/') -> | |
if | |
Op1 == '~' -> false; | |
true -> true | |
end; | |
(Op2 == '+') orelse (Op2 == '-') -> | |
if | |
(Op1 == '~') orelse (Op1 == '*') orelse (Op1 == '/') -> false; | |
true -> true | |
end; | |
true -> false % to stop 'push_op' recursion if Op2 is not a valid operator | |
end. | |
is_op(Op) -> is_binop(Op) orelse is_unop(Op). | |
is_unop(Op) -> Op == '~'. | |
is_binop(Op) -> (Op=='*') orelse (Op=='/') orelse (Op=='+') orelse (Op=='-'). | |
op_type(Op) -> | |
case is_binop(Op) of | |
true -> binary_op; | |
false -> | |
case is_unop(Op) of | |
true -> unary_op; | |
false -> none_op | |
end | |
end. | |
output_to_ast(OutputQueue) -> output_to_ast_go(OutputQueue, {}, {}, {}). | |
output_to_ast_go(OutputQueue, SP0, SP1, SP2) -> | |
{Item, NewOutputQueue} = queue:out(OutputQueue), | |
case Item of | |
{value, Token} when is_number(Token) -> | |
if | |
tuple_size(SP0) == 0 -> output_to_ast_go(NewOutputQueue, {num, Token}, SP1, SP2); | |
tuple_size(SP1) == 0 -> output_to_ast_go(NewOutputQueue, SP0, {num, Token}, SP2); | |
tuple_size(SP2) == 0 -> output_to_ast_go(NewOutputQueue, SP0, SP1, {num, Token}) | |
end; | |
{value, Token} -> | |
case op_type(Token) of | |
binary_op -> | |
if | |
tuple_size(SP2) > 0 -> output_to_ast_go(NewOutputQueue, SP0, {Token, SP1, SP2}, {}); | |
tuple_size(SP1) > 0 -> output_to_ast_go(NewOutputQueue, {Token, SP0, SP1}, {}, {}) | |
end; | |
unary_op -> | |
if | |
tuple_size(SP2) > 0 -> output_to_ast_go(NewOutputQueue, SP0, SP1, {Token, SP2}); | |
tuple_size(SP1) > 0 -> output_to_ast_go(NewOutputQueue, SP0, {Token, SP1}, SP2); | |
tuple_size(SP0) > 0 -> output_to_ast_go(NewOutputQueue, {Token, SP0}, SP1, SP2) | |
end | |
end; | |
empty -> SP0 | |
end. | |
% End of parser ------------------------------------------------------------ | |
eval(StrExpr) -> eval_op(parse(StrExpr)). | |
eval_op(Op) -> | |
case Op of | |
{num, Num} -> Num; | |
{'~', Op1} -> eval_neg(Op1); | |
{Func, Op1, Op2} -> | |
case Func of | |
'*' -> eval_mul(Op1, Op2); | |
'/' -> eval_div(Op1, Op2); | |
'+' -> eval_sum(Op1, Op2); | |
'-' -> eval_minus(Op1, Op2) | |
end | |
end. | |
eval_neg(Op) -> -(eval_op(Op)). | |
eval_mul(Op1, Op2) -> eval_op(Op1) * eval_op(Op2). | |
eval_div(Op1, Op2) -> eval_op(Op1) / eval_op(Op2). | |
eval_sum(Op1, Op2) -> eval_op(Op1) + eval_op(Op2). | |
eval_minus(Op1, Op2) -> eval_op(Op1) - eval_op(Op2). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment