Last active
June 28, 2024 16:17
-
-
Save chadfennell/71fe394cb76bdb69b1bbe841a85a0f80 to your computer and use it in GitHub Desktop.
From: Saša Jurić - Parsing from first principles - WebCamp Zagreb 2019
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Parsie do | |
@moduledoc """ | |
From this talk: https://www.youtube.com/watch?v=xNzoerDljjo | |
""" | |
def run do | |
input = "select col1 from ( | |
select col2, col3 from | |
( | |
select foo from some_table | |
) | |
)" | |
parse(input) | |
end | |
defp parse(input) do | |
parser = select_statement() | |
parser.(input) | |
end | |
def select_statement() do | |
sequence([ | |
keyword(:select), | |
columns(), | |
keyword(:from), | |
choice([token(identifier()), subquery()]) | |
]) | |
|> map(fn [_, columns, _, from] -> | |
%{statement: :select, columns: columns, from: from} | |
end) | |
end | |
def subquery do | |
sequence([ | |
token(char(?()), | |
lazy(fn -> select_statement() end), | |
token(char(?))) | |
]) | |
|> map(fn [_, select_statement, _] -> select_statement end) | |
end | |
def lazy(combinator) do | |
fn input -> | |
parser = combinator.() | |
parser.(input) | |
end | |
end | |
defp keyword(expected) do | |
identifier() | |
|> token() | |
|> satisfy(fn identifier -> | |
String.upcase(identifier) == String.upcase(to_string(expected)) | |
end) | |
|> map(fn _ -> expected end) | |
end | |
defp token(parser) do | |
sequence([ | |
many(ignored_char()), | |
parser, | |
many(ignored_char()) | |
]) | |
|> map(fn [_lw, term, _tw] -> term end) | |
end | |
defp separated_list(element_parser, separator_parser) do | |
sequence([ | |
element_parser, | |
many(sequence([separator_parser, element_parser])) | |
]) | |
|> map(fn [first_element, rest] -> | |
other_elements = Enum.map(rest, fn [_, element] -> element end) | |
[first_element | other_elements] | |
end) | |
end | |
defp sequence(parsers) do | |
fn input -> | |
case parsers do | |
[] -> | |
{:ok, [], input} | |
[parser | other_parsers] -> | |
with {:ok, term, rest} <- parser.(input), | |
{:ok, other_terms, rest} <- sequence(other_parsers).(rest) do | |
{:ok, [term | other_terms], rest} | |
end | |
end | |
end | |
end | |
defp map(parser, mapper) do | |
fn input -> | |
with {:ok, term, rest} <- parser.(input), | |
do: {:ok, mapper.(term), rest} | |
end | |
end | |
defp identifier do | |
identifier_char() | |
|> many() | |
|> satisfy(fn chars -> chars != [] end) | |
|> map(fn chars -> to_string(chars) end) | |
end | |
defp columns, do: separated_list(token(identifier()), token(char(?,))) | |
defp ignored_char, do: choice([char(?\s), char(?\n), char(?\t)]) | |
defp digit, do: satisfy(char(), fn char -> char in ?0..?9 end) | |
defp ascii_letter, do: satisfy(char(), fn char -> char in ?A..?Z || char in ?a..?z end) | |
defp char(expected), do: satisfy(char(), fn char -> char == expected end) | |
defp identifier_char(), do: choice([ascii_letter(), char(?_), digit()]) | |
defp choice(parsers) do | |
fn input -> | |
case parsers do | |
[] -> | |
{:error, "no parser succeeded"} | |
[first_parser | other_parsers] -> | |
with {:error, _reason} <- first_parser.(input), | |
do: choice(other_parsers).(input) | |
end | |
end | |
end | |
defp many(parser) do | |
fn input -> | |
case parser.(input) do | |
{:error, _reason} -> | |
{:ok, [], input} | |
{:ok, first_term, rest} -> | |
{:ok, other_terms, rest} = many(parser).(rest) | |
{:ok, [first_term | other_terms], rest} | |
end | |
end | |
end | |
defp satisfy(parser, acceptor) do | |
fn input -> | |
with {:ok, term, rest} <- parser.(input) do | |
if acceptor.(term), | |
do: {:ok, term, rest}, | |
else: {:error, "term rejected"} | |
end | |
end | |
end | |
defp char() do | |
fn input -> | |
case input do | |
"" -> {:error, "unexpected end of input"} | |
<<char::utf8, rest::binary>> -> {:ok, char, rest} | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment