Created
April 20, 2017 10:51
-
-
Save neel-krishnaswami/1cb3f33ea46ca056310ac1361af11d30 to your computer and use it in GitHub Desktop.
Typed algebraic parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* Types *) | |
module C = Set.Make(Char) | |
type tp = { null : bool; first : C.t; follow : C.t } | |
(* Grammars *) | |
type t = | |
| Var of string | |
| Fix of string * tp * t | |
| Eps | |
| Char of char | |
| Cat of t * t | |
| Bot | |
| Alt of t * t | |
| Nonnull of t | |
(* Type operators *) | |
let tp_eps = { null = true; first = C.empty; follow = C.empty } | |
let tp_bot = { null = false; first = C.empty; follow = C.empty } | |
let tp_char c = { null = false; first = C.singleton c; follow = C.empty } | |
let tp_alt tp tp' = { | |
null = tp.null || tp'.null ; | |
first = C.union tp.first tp'.first; | |
follow = C.union tp.follow tp'.follow } | |
let tp_seq tp tp' = { | |
null = tp.null && tp'.null; | |
first = tp.first; | |
follow = C.union tp'.follow (if tp'.null then tp.follow else C.empty) } | |
let tp_nonnull tp = { tp with null = false } | |
let apart tp tp' = not (tp.null && tp'.null) | |
&& C.is_empty (C.inter tp.first tp'.first) | |
let seq tp tp' = not tp.null && C.is_empty (C.inter tp.follow tp'.first) | |
(* Contexts and typechecking. We use a unified context with visibility | |
markers on hypotheses rather than two zones. *) | |
type hyp = { tp : tp; visible : bool } | |
type ctx = (string * hyp) list | |
let lookup x ctx = let h = List.assoc x ctx in | |
if h.visible then h.tp else raise Not_found | |
let make_visible ctx = List.map (fun (x,h) -> (x, {h with visible = true})) ctx | |
let rec typecheck ctx = function | |
| Var x -> lookup x ctx | |
| Fix (x,tp,g) -> typecheck ((x, {tp = tp; visible = false}) :: ctx) g | |
| Eps -> tp_eps | |
| Char c -> tp_char c | |
| Cat (g1, g2) -> let tp1 = typecheck ctx g1 in | |
let tp2 = typecheck (make_visible ctx) g2 in | |
if seq tp1 tp2 then | |
tp_seq tp1 tp2 | |
else | |
failwith "Ambiguous sequential composition" | |
| Bot -> tp_bot | |
| Alt (g1, g2) -> let tp1 = typecheck ctx g1 in | |
let tp2 = typecheck ctx g2 in | |
if apart tp1 tp2 then | |
tp_alt tp1 tp2 | |
else | |
failwith "Alternatives not disjoint" | |
| Nonnull g -> tp_nonnull (typecheck ctx g) | |
(* Substitution. We don't care about renaming because we only substitute values. *) | |
let rec subst g' x = function | |
| Var _ -> assert false | |
| Fix (y,tp,g) when x = y -> Fix (y,tp,g) | |
| Fix (y,tp,g) -> Fix (y,tp,subst g' x g) | |
| Eps -> Eps | |
| Char c -> Char c | |
| Cat (g1, g2) -> Cat(subst g' x g1, subst g' x g2) | |
| Bot -> Bot | |
| Alt (g1, g2) -> Alt(subst g' x g1, subst g' x g2) | |
| Nonnull g -> Nonnull (subst g' x g) | |
let cat g1 g2 = if g1 = Bot || g2 = Bot then Bot else Cat(g1, g2) | |
let alt g1 g2 = if g1 = Bot then g2 else if g2 = Bot then g1 else Alt(g1, g2) | |
(* Null checking and derivatives. Again we only care about closed terms. *) | |
let rec isnull = function | |
| Var x -> assert false | |
| Fix (y,tp,g) -> isnull g | |
| Eps -> true | |
| Alt (g1, g2) -> isnull g1 || isnull g2 | |
| _ -> false | |
let rec deriv c = function | |
| Var _ -> assert false | |
| Fix (x,tp,g) -> subst (Fix (x,tp,g)) x (deriv c g) | |
| Eps -> Bot | |
| Char c' -> if c = c' then Eps else Bot | |
| Cat (g1,g2) -> let g1' = deriv c g1 in | |
if isnull g1' then | |
alt (cat (Nonnull g1') g2) g2 | |
else | |
cat g1' g2 | |
| Bot -> Bot | |
| Alt (g1, g2) -> alt (deriv c g1) (deriv c g2) | |
| Nonnull g -> deriv c g |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment