Created
June 26, 2014 07:41
-
-
Save voidlizard/32b2fb18bfba2265c063 to your computer and use it in GitHub Desktop.
picoparser.erl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -module(picoparser). | |
| -export([ str/0 | |
| , str/1 | |
| , str_/0 | |
| , seq/1 | |
| , any/1 | |
| , whatever/0 | |
| , many/1 | |
| , match/1 | |
| , maybe/1 | |
| , repeat/2 | |
| , char/0 | |
| , char/1 | |
| , chars/1 | |
| , w/0 | |
| , digit/0 | |
| , spaces/0 | |
| , not_eol/0 | |
| , bind/2 | |
| , new_context_string/2 | |
| , new_context_binary/2 | |
| , parse/3 | |
| ]). | |
| -record(pctx, { state | |
| , tokenizer | |
| , is_empty | |
| , eof | |
| }). | |
| -define(L, lists). | |
| %% constructors | |
| str_() -> parser(fun id/1, fun(C, I) -> parse_string_any(false, C, I) end). | |
| str() -> parser(fun(S,_) -> S end, fun(C, I) -> parse_string_any(true, C, I) end). | |
| str(S) -> parser(fun id/1, fun(C, I) -> parse_string_exact(S, C, I) end). | |
| seq(P) -> parser(fun id/1, fun(C, I) -> parse_sequence(P, C, I) end). | |
| any(P) -> parser(fun id/1, fun(C, I) -> parse_any(P, C, I) end). | |
| many(P) -> parser(fun id/1, fun(C, I) -> parse_many(P, C, I) end). | |
| match(F) -> parser(fun id/2, fun(C, I) -> parse_match(F, C, I) end). | |
| maybe(P) -> parser(fun id/1, fun(C, I) -> parse_maybe(P, C, I) end). | |
| repeat(N, P) -> parser(fun id/1, fun(C, I) -> parse_repeat(N, P, C, I) end). | |
| whatever() -> any([]). | |
| %% functions | |
| bind({_, P}, F) -> {F, P}. | |
| new_context_string(S, T) -> | |
| #pctx{ state = S | |
| , tokenizer = T | |
| , is_empty = fun(Tok) -> Tok =:= [] end | |
| , eof = fun(In) -> In =:= [] end | |
| }. | |
| new_context_binary(S, T) -> | |
| #pctx{ state = S | |
| , tokenizer = T | |
| , is_empty = fun(Tok) -> Tok =:= <<>> end | |
| , eof = fun(In) -> In =:= <<>> end | |
| }. | |
| parse(P, C, In) -> parser_apply(P, C, In). | |
| %% misc parsers for character streams | |
| %FIXME: move to header file | |
| -define(IS_SPACE(C), | |
| (C =:= $\s orelse C =:= $\t | |
| orelse C =:= $\r | |
| orelse C =:= $\n)). | |
| char() -> str(). | |
| char(C) -> str([C]). | |
| chars(S) -> seq(?L:map(fun char/1, S)). | |
| not_space() -> | |
| match(fun(S) -> case S of | |
| [C] when not(?IS_SPACE(C)) -> true; | |
| _ -> false | |
| end | |
| end). | |
| space() -> | |
| match(fun(S) -> case S of | |
| [C] when ?IS_SPACE(C) -> true; | |
| _ -> false | |
| end | |
| end). | |
| digit() -> | |
| match(fun(S) -> case S of | |
| [$0] -> true; | |
| [$1] -> true; | |
| [$2] -> true; | |
| [$3] -> true; | |
| [$4] -> true; | |
| [$5] -> true; | |
| [$6] -> true; | |
| [$7] -> true; | |
| [$8] -> true; | |
| [$9] -> true; | |
| _ -> false | |
| end | |
| end). | |
| spaces() -> many(space()). | |
| w() -> many(not_space()). | |
| not_eol() -> | |
| match(fun(S) -> case S of | |
| [C] when C =/= $\n -> true; | |
| _ -> false | |
| end | |
| end). | |
| %% private functions | |
| id(S) -> S. | |
| id(S, _) -> S. | |
| parser(C, X) -> {C, X}. | |
| parser_apply({F, X}, #pctx{is_empty = _E} = S, I) -> | |
| case X(S, I) of | |
| {match, {{R_, S_}, I_}} -> {match, {F(S_, R_), I_}}; | |
| {match, {S_, I_}} -> {match, {F(S_), I_}}; | |
| Y -> Y | |
| end. | |
| parse_string_any(R, #pctx{tokenizer = T, is_empty = E, state = S}, Input) -> | |
| {Tok, Input_} = T(Input), | |
| case {R, E(Tok)} of | |
| {true, false} -> {match, {{Tok,S}, Input_}} ; | |
| {false, false} -> {match, {S, Input_}} ; | |
| {_, true} -> {notmatch, {S, Input_}} | |
| end. | |
| parse_string_exact(S, #pctx{tokenizer = T, state = St}, Input) -> | |
| {Tok, Input_} = T(Input), | |
| case S =:= Tok of | |
| true -> {match, {St, Input_}} ; | |
| false -> {notmatch, {St, Input}} | |
| end. | |
| %% FIXME: not return last input when no one consumes | |
| %% (but how? arity check?) | |
| parse_match(F, #pctx{tokenizer = T, state = St}, Input) -> | |
| {Tok, Input_} = T(Input), | |
| case F(Tok) of | |
| true -> {match, {{Tok,St}, Input_}} ; | |
| false -> {notmatch, {St, Input}} | |
| end. | |
| parse_sequence(P, C, In) -> parse_sequence_rec(P, C, In). | |
| parse_sequence_rec([], C, In) -> {match, {C#pctx.state, In}}; | |
| parse_sequence_rec([P|Ps], C, In) -> | |
| case parser_apply(P, C, In) of | |
| {match, {S_, In_}} -> parse_sequence_rec(Ps, C#pctx{state = S_}, In_); | |
| Unmatched -> Unmatched | |
| end. | |
| parse_any(P, C, In) -> parse_any_rec(P, C, In, {notmatch, {C#pctx.state, In}}). | |
| parse_any_rec([], C, In, {R, _}) -> {R, {C#pctx.state, In}}; | |
| parse_any_rec([P|Ps], C, In, R) -> | |
| case parser_apply(P, C, In) of | |
| {match, X} -> {match, X}; | |
| {notmatch, _} -> parse_any_rec(Ps, C, In, R) | |
| end. | |
| parse_many(P, C, In) -> | |
| parse_many_rec(P, C, In, {notmatch, {C#pctx.state, In}}). | |
| parse_many_rec(P, C, In, L) -> | |
| case parser_apply(P, C, In) of | |
| {match, {St, In_}} = X -> parse_many_rec(P, C#pctx{state = St}, In_, X); | |
| _ -> L | |
| end. | |
| parse_repeat(N, P, C, In) -> | |
| parse_repeat_rec(N, P, C, In, {notmatch, {C#pctx.state, In}}). | |
| parse_repeat_rec(0, _, _, _, L) -> L; | |
| parse_repeat_rec(N, P, C, In, L) -> | |
| case parser_apply(P, C, In) of | |
| {match, {St, In_}} = X -> | |
| parse_repeat_rec(N-1, P, C#pctx{state = St}, In_, X); | |
| _ -> L | |
| end. | |
| parse_maybe(P, C, In) -> | |
| case parser_apply(P, C, In) of | |
| {match, {_, _}} = X -> X; | |
| {notmatch, {St, _In}} -> {match, {St, In}} | |
| end. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment