Created
October 14, 2014 09:07
-
-
Save y-yu/f65cd4b36f08a540a0f8 to your computer and use it in GitHub Desktop.
implement to convert REs to aNFA and parse the RE greedy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type alphabet = | |
Char of char | |
| Choice of bit | |
| Log of bit | |
and | |
bit = bool | |
let one = true | |
let zero = false | |
type reg = | |
Lit of char | |
| Alt of reg * reg | |
| Con of reg * reg | |
| Star of reg | |
let to_anfa reg = | |
let counter = ref 0 in | |
let mk_state () = | |
counter := !counter + 1; | |
!counter | |
in | |
let rec loop s g = function | |
Lit c -> [(s, g, (Char c))] | |
| Alt (p, q) -> | |
let n1 = mk_state () in | |
let n1' = mk_state () in | |
let n2 = mk_state () in | |
let n2' = mk_state () in | |
[(s, n1, (Choice zero)); (s, n2, (Choice one)); | |
(n1', g, (Log zero)); (n2', g, (Log one))] @ (loop n1 n1' p) @ (loop n2 n2' q) | |
| Con (p, q) -> | |
let n = mk_state () in | |
(loop s n p) @ (loop n g q) | |
| Star p -> | |
let n1 = mk_state () in | |
let n2 = mk_state () in | |
let n3 = mk_state () in | |
[(s, n1, (Log one)); (n1, n2, (Choice zero)); | |
(n3, n1, (Log zero)); (n1, g, (Choice one))] @ (loop n2 n3 p) | |
in | |
let s = mk_state () in | |
let g = mk_state () in | |
(s, g, (loop s g reg)) | |
let rec forward s sym = function | |
(s', g, sym')::ds -> if s = s' && sym = sym' then g | |
else forward s sym ds | |
| [] -> -1 | |
let union a b = | |
let rec loop a' = function | |
x::xs -> if List.mem x a then loop a' xs | |
else loop (x::a') xs | |
| [] -> a' | |
in | |
a @ (loop [] b) | |
let eclose ?log:(l = ([], [])) s d = | |
let log = ref (fst l, snd l) in | |
let rec loop s' p = function | |
(s'', g, syd)::xs when s'' = s' -> | |
if not (List.mem g p) then | |
(match syd with | |
Choice b -> | |
if b = zero then | |
let n = loop g (g::p) d in | |
union n (loop s' p xs) | |
else | |
loop g (g::p) d | |
| Log b -> | |
let (l, p) = !log in | |
if not (List.mem g p) then log := (b::l, g::p); | |
loop g p d | |
| Char _ -> [s']) | |
else | |
[] | |
| _::xs -> loop s' p xs | |
| [] -> [s'] | |
in | |
let e = s::(loop s [] d) in | |
(e, !log) | |
let string_of_sym = function | |
Char c -> "Char " ^ (String.make 1 c) | |
| Choice b -> if b = one then "Choice 1" else "Choice 0" | |
| Log b -> if b = one then "Log 1" else "Log 0" | |
let rec map_pp = function | |
(s, g, sym)::xs -> Printf.printf "%d : (%s) -> %d\n" s (string_of_sym sym) g; | |
map_pp xs | |
| [] -> () | |
let list_pp l = | |
List.iter (fun x -> Printf.printf "%d " x) l; | |
print_string "\n" | |
let log_pp l = List.iter (fun x -> if x then print_string "1 " | |
else print_string "0 ") l | |
let log_list_pp l = | |
List.iter (fun x -> log_pp x; print_string "\n") l; | |
print_string "########\n" | |
let accept reg = | |
let (s, g, d) = to_anfa reg in | |
map_pp d; | |
let one_step : char -> int list -> (int list * bit list) = fun c s -> | |
let (n, (l, _)) = List.fold_left (fun x y -> | |
let (s, l) = x in | |
let (s', l') = eclose ~log:(l) y d in | |
((union s s'), l') ) | |
([], ([], [])) (List.map (fun x -> forward x (Char c) d) s) | |
in | |
(n, l) | |
in | |
let log = ref [] in | |
let rec step s = function | |
c::cs -> | |
let (s', l) = one_step c s in | |
log := l :: !log; | |
step s' cs | |
| [] -> List.mem g s | |
in | |
fun str -> | |
let (s', (l, _)) = eclose s d in | |
log := l :: !log; | |
let t = step s' str in | |
(t, !log, (s, g, d)) | |
let backward g log d = | |
let rec loop g log = function | |
(s, g', sym)::ds when g = g' -> | |
(match sym with | |
Log b when (List.hd log) = b -> | |
loop s (List.tl log) d | |
| Choice b -> | |
let (s', b') = loop s log d in | |
(s', b::b') | |
| Char _ -> (s, []) | |
| _ -> loop g log ds) | |
| _::ds -> loop g log ds | |
| [] -> (g, []) | |
in | |
loop g log d | |
let rec parse reg = | |
let accept' = accept reg in | |
fun str -> | |
let (b, log, (s, g, d)) = accept' str in | |
let rec loop g str log = | |
match (str, log) with | |
(_::cs, l::ls) -> | |
let (g', b') = backward g l d in | |
b' @ (loop g' cs ls) | |
| ([], l::[]) -> | |
log_pp l; | |
let (g', b') = backward g l d in b' | |
| _ -> failwith "parse error" | |
in | |
log_list_pp log; | |
loop g str log |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment