Skip to content

Instantly share code, notes, and snippets.

@ichiban
Created January 16, 2016 09:19
Show Gist options
  • Save ichiban/79bb08216fa2ba89904c to your computer and use it in GitHub Desktop.
Save ichiban/79bb08216fa2ba89904c to your computer and use it in GitHub Desktop.
Earley Recognizer
open Batteries
(**** symbols ****)
type symbol =
| Terminal of UChar.t
| Nonterminal of Text.t
let symbol_to_text = function
| Terminal char -> Text.of_uchar char
| Nonterminal label ->
let l, r = Text.of_string "<", Text.of_string ">" in
Text.join Text.empty [l; label; r]
let is_nonterminal = function
| Terminal _ -> false
| Nonterminal _ -> true
let char = function
| Terminal char -> char
| Nonterminal _ -> undefined ()
(**** rules ****)
type 'a rule =
{
label : Text.t;
symbols : symbol list;
action : 'a Enum.t -> 'a
}
let rule_to_text ?dot_at rule =
let dot = Text.of_string "●" in
let is_dot i = match dot_at with None -> false | Some j -> i = j in
let maybe_dot i = if is_dot i then [dot] else [] in
let rec symbols i = function
| [] -> maybe_dot i
| hd :: tl -> maybe_dot i @ symbol_to_text hd :: symbols (i + 1) tl
in
let space = Text.of_string " " in
let l, r = Text.of_string "<", Text.of_string ">" in
let label rule = Text.join Text.empty [l; rule.label; r] in
let arrow = Text.of_string "->" in
let symbols_and_dot = Text.join space (symbols 0 rule.symbols) in
Text.join space [label rule; arrow; symbols_and_dot]
type 'a t = 'a rule list
type 'a state =
{
rule : 'a rule;
current : int;
origin : int;
data : 'a list;
}
let state_to_text state =
let rule = rule_to_text state.rule ~dot_at:state.current in
let separator = Text.of_string ", " in
let origin = state.origin |> String.of_int |> Text.of_string in
Text.join separator [rule; origin]
let is_complete state =
List.length state.rule.symbols = state.current
let is_completed_by state = function
| Terminal _ -> false
| Nonterminal label -> state.rule.label = label
let next_symbol state =
let symbols = state.rule.symbols in
if List.length symbols > state.current then
Some (List.at state.rule.symbols state.current)
else
None
let grammar_rules_for grammar = function
| Terminal _ -> Enum.empty ()
| Nonterminal label ->
let same_label rule = (label = rule.label) in
List.enum grammar |> filter same_label
(* chart *)
let initial_states start grammar =
let start_label rule = start = rule.label in
let rule_to_state rule =
{
rule = rule;
current = 0;
origin = 0;
data = [];
}
in
List.enum grammar
|> filter start_label
|> map rule_to_state
|> Set.of_enum
let initial_chart start length grammar =
let chart = Array.make (length + 1) (Set.empty, Set.empty) in
let unprocessed = initial_states start grammar in
let processed = Set.empty in
Array.set chart 0 (unprocessed, processed);
chart
let add_all_to_set chart index states =
let unprocessed, processed = Array.get chart index in
let unprocessed = Set.diff (Set.union states unprocessed) processed in
Array.set chart index (unprocessed, processed)
let add_to_set chart index state =
add_all_to_set chart index (Set.singleton state)
let pop_state chart index =
let unprocessed, processed = Array.get chart index in
if Set.is_empty unprocessed then
None
else
begin
let state, rest = Set.pop unprocessed in
let added = Set.add state processed in
Array.set chart index (rest, added);
Some state
end
let nullable_symbols grammar =
let rec search known =
let is_nullable rule =
match rule.symbols with
| [] -> true
| [x] -> Set.mem x known
| _ -> false
in
let symbol rule = Nonterminal rule.label in
let found =
List.enum grammar |> filter is_nullable |> map symbol |> Set.of_enum
in
if Set.equal found known then
known
else
search (Set.union known found)
in
search Set.empty
(* the actual algorithm *)
let predict chart state j grammar nullables =
let next_symbol = next_symbol state |> Option.get in
let rules = grammar_rules_for grammar next_symbol in
let rule_to_state rule =
{
rule = rule;
current = 0;
origin = j;
data = [];
} in
let new_states = rules |> map rule_to_state |> Set.of_enum in
let is_nullable symbol = Set.mem symbol nullables in
let magical_completion = {state with current = state.current + 1 } in
add_all_to_set chart j new_states;
if is_nullable next_symbol then
add_to_set chart j magical_completion
let scan input chart state j =
let char = next_symbol state |> Option.get |> char in
let new_state = {
state with
current = state.current + 1;
} in
if j < Text.length input && Text.get input j = char then
add_to_set chart (j + 1) new_state
let complete chart state k =
let next_state state = { state with current = state.current + 1 } in
let _, processed = Array.get chart state.origin in
processed
|> Set.filter (next_symbol %> Option.is_some)
|> Set.filter (next_symbol %> Option.get %> is_completed_by state)
|> Set.map next_state
|> add_all_to_set chart k
let empty = []
let rule label symbols action grammar =
{label = label; symbols = symbols; action = action} :: grammar
let parse input start grammar =
let length = Text.length input in
let chart = initial_chart start length grammar in
let nullables = nullable_symbols grammar in
let rec process_states i =
let process state =
if is_complete state then
complete chart state i
else if next_symbol state |> Option.get |> is_nonterminal then
predict chart state i grammar nullables
else
scan input chart state i;
process_states i
in
Option.may process (pop_state chart i)
in
foreach (0 -- length) process_states;
let print_state_set i =
Printf.printf "S(%d): %s\n" i (Text.left input i |> Text.to_string);
let print_state state =
Printf.printf "%s\n%!" (state_to_text state |> Text.to_string)
in
foreach (match Array.get chart i with _, p -> Set.enum p) print_state
in
foreach (0 -- length) print_state_set
open Batteries
type 'a t
type symbol =
| Terminal of UChar.t
| Nonterminal of Text.t
val empty : 'a t
val rule : Text.t -> symbol list -> ('a list -> 'a) -> 'a t -> 'a t
val parse : Text.t -> Text.t -> 'a t -> unit
open Batteries
let () =
let term char = Grammar.Terminal (UChar.of_char char) in
let nonterm label = Grammar.Nonterminal (Text.of_string label) in
let rule label symbols = Grammar.rule (Text.of_string label) symbols ignore in
let parse string label =
Grammar.parse (Text.of_string string) (Text.of_string label)
in
Grammar.empty
|> rule "Sum" [nonterm "Sum"; term '+'; nonterm "Product"]
|> rule "Sum" [nonterm "Sum"; term '-'; nonterm "Product"]
|> rule "Sum" [nonterm "Product"]
|> rule "Product" [nonterm "Product"; term '*'; nonterm "Factor"]
|> rule "Product" [nonterm "Product"; term '/'; nonterm "Factor"]
|> rule "Product" [nonterm "Factor"]
|> rule "Factor" [term '('; nonterm "Sum"; term ')']
|> rule "Factor" [nonterm "Number"]
|> rule "Number" [term '0'; nonterm "Number"]
|> rule "Number" [term '1'; nonterm "Number"]
|> rule "Number" [term '2'; nonterm "Number"]
|> rule "Number" [term '3'; nonterm "Number"]
|> rule "Number" [term '4'; nonterm "Number"]
|> rule "Number" [term '5'; nonterm "Number"]
|> rule "Number" [term '6'; nonterm "Number"]
|> rule "Number" [term '7'; nonterm "Number"]
|> rule "Number" [term '8'; nonterm "Number"]
|> rule "Number" [term '9'; nonterm "Number"]
|> rule "Number" [term '0']
|> rule "Number" [term '1']
|> rule "Number" [term '2']
|> rule "Number" [term '3']
|> rule "Number" [term '4']
|> rule "Number" [term '5']
|> rule "Number" [term '6']
|> rule "Number" [term '7']
|> rule "Number" [term '8']
|> rule "Number" [term '9']
|> parse "1+(2*3-4)" "Sum"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment