Skip to content

Instantly share code, notes, and snippets.

@voidlizard
Created June 26, 2014 07:41
Show Gist options
  • Select an option

  • Save voidlizard/32b2fb18bfba2265c063 to your computer and use it in GitHub Desktop.

Select an option

Save voidlizard/32b2fb18bfba2265c063 to your computer and use it in GitHub Desktop.
picoparser.erl
-module(picoparser).
-export([ str/0
, str/1
, str_/0
, seq/1
, any/1
, whatever/0
, many/1
, match/1
, maybe/1
, repeat/2
, char/0
, char/1
, chars/1
, w/0
, digit/0
, spaces/0
, not_eol/0
, bind/2
, new_context_string/2
, new_context_binary/2
, parse/3
]).
-record(pctx, { state
, tokenizer
, is_empty
, eof
}).
-define(L, lists).
%% constructors
str_() -> parser(fun id/1, fun(C, I) -> parse_string_any(false, C, I) end).
str() -> parser(fun(S,_) -> S end, fun(C, I) -> parse_string_any(true, C, I) end).
str(S) -> parser(fun id/1, fun(C, I) -> parse_string_exact(S, C, I) end).
seq(P) -> parser(fun id/1, fun(C, I) -> parse_sequence(P, C, I) end).
any(P) -> parser(fun id/1, fun(C, I) -> parse_any(P, C, I) end).
many(P) -> parser(fun id/1, fun(C, I) -> parse_many(P, C, I) end).
match(F) -> parser(fun id/2, fun(C, I) -> parse_match(F, C, I) end).
maybe(P) -> parser(fun id/1, fun(C, I) -> parse_maybe(P, C, I) end).
repeat(N, P) -> parser(fun id/1, fun(C, I) -> parse_repeat(N, P, C, I) end).
whatever() -> any([]).
%% functions
bind({_, P}, F) -> {F, P}.
new_context_string(S, T) ->
#pctx{ state = S
, tokenizer = T
, is_empty = fun(Tok) -> Tok =:= [] end
, eof = fun(In) -> In =:= [] end
}.
new_context_binary(S, T) ->
#pctx{ state = S
, tokenizer = T
, is_empty = fun(Tok) -> Tok =:= <<>> end
, eof = fun(In) -> In =:= <<>> end
}.
parse(P, C, In) -> parser_apply(P, C, In).
%% misc parsers for character streams
%FIXME: move to header file
-define(IS_SPACE(C),
(C =:= $\s orelse C =:= $\t
orelse C =:= $\r
orelse C =:= $\n)).
char() -> str().
char(C) -> str([C]).
chars(S) -> seq(?L:map(fun char/1, S)).
not_space() ->
match(fun(S) -> case S of
[C] when not(?IS_SPACE(C)) -> true;
_ -> false
end
end).
space() ->
match(fun(S) -> case S of
[C] when ?IS_SPACE(C) -> true;
_ -> false
end
end).
digit() ->
match(fun(S) -> case S of
[$0] -> true;
[$1] -> true;
[$2] -> true;
[$3] -> true;
[$4] -> true;
[$5] -> true;
[$6] -> true;
[$7] -> true;
[$8] -> true;
[$9] -> true;
_ -> false
end
end).
spaces() -> many(space()).
w() -> many(not_space()).
not_eol() ->
match(fun(S) -> case S of
[C] when C =/= $\n -> true;
_ -> false
end
end).
%% private functions
id(S) -> S.
id(S, _) -> S.
parser(C, X) -> {C, X}.
parser_apply({F, X}, #pctx{is_empty = _E} = S, I) ->
case X(S, I) of
{match, {{R_, S_}, I_}} -> {match, {F(S_, R_), I_}};
{match, {S_, I_}} -> {match, {F(S_), I_}};
Y -> Y
end.
parse_string_any(R, #pctx{tokenizer = T, is_empty = E, state = S}, Input) ->
{Tok, Input_} = T(Input),
case {R, E(Tok)} of
{true, false} -> {match, {{Tok,S}, Input_}} ;
{false, false} -> {match, {S, Input_}} ;
{_, true} -> {notmatch, {S, Input_}}
end.
parse_string_exact(S, #pctx{tokenizer = T, state = St}, Input) ->
{Tok, Input_} = T(Input),
case S =:= Tok of
true -> {match, {St, Input_}} ;
false -> {notmatch, {St, Input}}
end.
%% FIXME: not return last input when no one consumes
%% (but how? arity check?)
parse_match(F, #pctx{tokenizer = T, state = St}, Input) ->
{Tok, Input_} = T(Input),
case F(Tok) of
true -> {match, {{Tok,St}, Input_}} ;
false -> {notmatch, {St, Input}}
end.
parse_sequence(P, C, In) -> parse_sequence_rec(P, C, In).
parse_sequence_rec([], C, In) -> {match, {C#pctx.state, In}};
parse_sequence_rec([P|Ps], C, In) ->
case parser_apply(P, C, In) of
{match, {S_, In_}} -> parse_sequence_rec(Ps, C#pctx{state = S_}, In_);
Unmatched -> Unmatched
end.
parse_any(P, C, In) -> parse_any_rec(P, C, In, {notmatch, {C#pctx.state, In}}).
parse_any_rec([], C, In, {R, _}) -> {R, {C#pctx.state, In}};
parse_any_rec([P|Ps], C, In, R) ->
case parser_apply(P, C, In) of
{match, X} -> {match, X};
{notmatch, _} -> parse_any_rec(Ps, C, In, R)
end.
parse_many(P, C, In) ->
parse_many_rec(P, C, In, {notmatch, {C#pctx.state, In}}).
parse_many_rec(P, C, In, L) ->
case parser_apply(P, C, In) of
{match, {St, In_}} = X -> parse_many_rec(P, C#pctx{state = St}, In_, X);
_ -> L
end.
parse_repeat(N, P, C, In) ->
parse_repeat_rec(N, P, C, In, {notmatch, {C#pctx.state, In}}).
parse_repeat_rec(0, _, _, _, L) -> L;
parse_repeat_rec(N, P, C, In, L) ->
case parser_apply(P, C, In) of
{match, {St, In_}} = X ->
parse_repeat_rec(N-1, P, C#pctx{state = St}, In_, X);
_ -> L
end.
parse_maybe(P, C, In) ->
case parser_apply(P, C, In) of
{match, {_, _}} = X -> X;
{notmatch, {St, _In}} -> {match, {St, In}}
end.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment