Last active
July 30, 2019 00:55
-
-
Save cararemixed/388336fa2e0ca71bbcafb0bff291638b to your computer and use it in GitHub Desktop.
Parsing with derivatives in Erlang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(deriv). | |
-compile([export_all]). | |
nullable({rep, _}) -> true; | |
nullable({alt, Alts}) -> lists:any(fun nullable/1, Alts); | |
nullable(L) when is_list(L) -> lists:all(fun nullable/1, L); | |
nullable(_) -> false. | |
d(_, empty) -> empty; | |
d(_, []) -> empty; | |
d(C, C) -> []; | |
d(_, C) when is_integer(C) -> empty; | |
d(C, {alt, A}) -> {alt, [d(C, L) || L <- A]}; | |
d(C, {rep, R}) -> [d(C, R), {rep, R}]; | |
d(C, [L]) -> d(C, L); | |
d(C, [L | Rest]) -> | |
case nullable(L) of | |
true -> {alt, [[d(C, L) | Rest], d(C, Rest)]}; | |
false -> [d(C, L) | Rest] | |
end. | |
match(Str, Lang) -> | |
nullable(lists:foldl(fun d/2, Lang, Str)). | |
%% Match the regexp eqivalent of abcd?x* | |
%% deriv:match("abcxxx", ["abc", {alt, ["d", ""]}, {rep, "x"}]). => true |
Since this was discussed in short form on twitter, I'll just drop the link here for now: https://twitter.com/wvmdltr/status/813432550597283845
I might write a full parser framework using this technique at some point but I wanted to see how easy it would be to boil down the basic idea behind the algorithm. I encode the null language as an empty list and a concatenation as a non-empty list. A single element list is the same as the single element outside of the list which explains line 15.
lists:all/2
returns true for empty lists which is why I don't need a clause to match the null language in the nullable/1
function. The alt also takes a list of alternates rather than just a pair like certain implementations you might see.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey. Could you please explain how this is a derivative? I don't see the connection. Thanks :)