Last active
October 18, 2017 04:59
-
-
Save magical/152d5041fa190b8afec4c2a3d88f9170 to your computer and use it in GitHub Desktop.
NFA -> Regular Expression conversion in OCaml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type node = { accept: bool; links: link list } | |
and link = Link of char * node | |
type nfa_type = NFA of node list * node | |
type regexp = | |
| Empty | |
| Epsilon | |
| Single of char | |
| Union of regexp * regexp | |
| Concat of regexp * regexp | |
| Star of regexp | |
let rec tostring = function | |
| Empty -> "<empty>" | |
| Epsilon -> "ε" | |
| Single c -> String.make 1 c | |
| Star r -> parens r ^ "*" | |
| Concat(r,Concat(s,t)) -> parens r ^ tostring(Concat(s,t)) | |
| Concat(r, s) -> parens r ^ parens s | |
| Union(r, s) -> parens r ^ "+" ^ parens s | |
and parens = function | |
| Empty -> "<empty>" | |
| Epsilon -> "ε" | |
| Single(c) -> tostring(Single(c)) | |
| Star(r) -> tostring(Star(r)) | |
| r -> "(" ^ tostring r ^ ")" | |
let rec concat = function | |
| (_, Empty) -> Empty | |
| (Empty, _) -> Empty | |
| (r, Epsilon) -> r | |
| (Epsilon, s) -> s | |
| (Concat(r,s),t) -> Concat(r,concat(s,t)) | |
| (r,s) -> Concat(r,s) | |
let union = function | |
| (r, Empty) -> r | |
| (Empty,s) -> s | |
| (r,s) -> Union(r,s) | |
let star = function | |
| Empty -> Epsilon | |
| Epsilon -> Epsilon | |
| r -> Star(r) | |
(* For debugging... *) | |
let print_edges = | |
Hashtbl.iter (fun k r -> | |
let (i,j) = k in | |
Printf.printf "%d,%d %s\n" i j (tostring r); | |
) | |
let print_nodemap = | |
Hashtbl.iter (fun k v -> Printf.printf "%d\n" v) | |
let get_edge h k = | |
if Hashtbl.mem h k | |
then Hashtbl.find h k | |
else Empty | |
let add_edge h k r = | |
Hashtbl.replace h k ( | |
if Hashtbl.mem h k | |
then union(Hashtbl.find h k, r) | |
else r | |
) | |
let toregexp nfa = | |
let NFA(nodes, start) = nfa in | |
let n = List.length nodes in | |
let edges = Hashtbl.create n in | |
let nodemap = Hashtbl.create n in | |
ListLabels.iteri nodes ~f:(fun i q -> | |
Hashtbl.add nodemap q (i+3); | |
); | |
add_edge edges (1,Hashtbl.find nodemap start) Epsilon; | |
ListLabels.iteri nodes ~f:(fun i q -> | |
ListLabels.iter q.links ~f:(function Link(c, a) -> | |
let j = Hashtbl.find nodemap a in | |
add_edge edges (i+3,j) (Single c); | |
); | |
if q.accept then | |
add_edge edges (i+3,2) Epsilon | |
; | |
); | |
(*print_edges edges; | |
print_string "-\n";*) | |
for q = 2+n downto 3 do | |
let self = get_edge edges (q,q) in | |
for i = 1 to q-1 do | |
for j = 1 to q-1 do | |
let a = get_edge edges (i,q) in | |
let b = get_edge edges (q,j) in | |
if a != Empty && b != Empty then | |
(*Printf.printf "%d,%d,%d: %s,%s,%s\n" i q j (tostring a) (tostring self) (tostring b);*) | |
let r = concat(a, concat(star self, b)) in | |
add_edge edges (i,j) r; | |
done; | |
done; | |
for i = 1 to q-1 do | |
Hashtbl.remove edges (i,q); | |
Hashtbl.remove edges (q,i); | |
done; | |
Hashtbl.remove edges (q,q) | |
done; | |
(*print_endline "-"; | |
print_edges edges;*) | |
get_edge edges (1,2) | |
let main = | |
(* a* *) | |
(*let rec n1 = { accept=true; links=[Link('a', n1)] } in | |
let nfa = NFA([n1], n1) in*) | |
(* example from class *) | |
(*let rec n1 = { accept=true; links=[Link('a', n2)] } | |
and n2 = { accept=false; links=[Link('a', n2); Link('b', n3)] } | |
and n3 = { accept=false; links=[Link('a', n2); Link('b', n1)] } in | |
let nfa = NFA([n1; n3; n2], n1) in*) | |
(* even number of as and bs *) | |
(* let rec n1 = { accept=true; links=[Link('a', n2); Link('b', n3)] } | |
and n2 = { accept=false; links=[Link('a', n1); Link('b', n4)] } | |
and n3 = { accept=false; links=[Link('a', n4); Link('b', n1)] } | |
and n4 = { accept=false; links=[Link('a', n3); Link('b', n2)] } in | |
let nfa = NFA([n1; n2; n3; n4], n1) in *) | |
(* problem 3 *) | |
let rec n1 = { accept=false; links=[Link('a', n2); Link('a', n3)] } | |
and n2 = { accept=true; links=[Link('a', n2); Link('b', n1)] } | |
and n3 = { accept=true; links=[Link('a', n3); Link('a', n2)] } in | |
let nfa = NFA([n1; n2; n3], n1) in | |
let r = toregexp nfa in | |
print_string (tostring r) | |
;; | |
main | |
(* :nmap <cr> :w \| !ocaml %<cr> *) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment