Created
September 29, 2018 19:39
-
-
Save talyian/b0c3fa9f84c6b2d6e83a69bc2fbed54c to your computer and use it in GitHub Desktop.
Convert a Regex to NFA to DFA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// An exercise in converting a regex to an NFA to a DFA | |
type RegexNode = | |
| Or of RegexNode list | |
| Seq of RegexNode List | |
| Star of RegexNode | |
| T of string | |
| Eta (* todo: does this value really belong here? *) | |
let regex_pattern = "x(x|y)*|z" | |
let regex = Or [ Seq [ T "x"; Star (Or [T "x"; T"y"])]; T "z"] | |
type Node = { n: int32 } | |
type 't Graph = { | |
nodes: ResizeArray<int32> | |
names: ResizeArray<string> | |
edges: ResizeArray<(int32 * int32 * 't)> | |
} | |
with | |
override __.ToString() = | |
let out = new System.IO.StringWriter() | |
let edges_by_first = __.edges |> Seq.groupBy (fun (a, b, c) -> a) | |
fprintf out "--- Terms: " | |
for x in __.names do fprintf out "[%O], " x | |
fprintfn out "" | |
for a, group in edges_by_first do | |
fprintfn out "[%s]" (__.names.[a]) | |
for (a, b, c) in group do | |
fprintfn out " [%O] : %O" (__.names.[b]) c | |
out.ToString() | |
let NfaFromRegex (regex:RegexNode) = | |
let nodes = new ResizeArray<int32>() | |
let names = new ResizeArray<string>() | |
let edges = new ResizeArray<(int32 * int32 * RegexNode)>() | |
let newstate (name:string) = | |
let state = nodes.Count | |
names.Add name | |
nodes.Add state; {n=state} | |
let remove_edge_at i = | |
edges.[i] <- edges.[edges.Count - 1] | |
edges.RemoveAt(edges.Count - 1) | |
let a = newstate "start" | |
let b = newstate "end" | |
edges.Add(a.n, b.n, regex) | |
let mutable i = 0 | |
while i < edges.Count do | |
match edges.[i] with | |
| (a, b, Or items) -> | |
remove_edge_at i | |
for x in items do | |
edges.Add(a, b, x) | |
| (a, b, Seq []) -> remove_edge_at i | |
| (a, b, Seq [x]) -> edges.[i] <- (a, b, x) | |
| (a, b, Seq items) -> | |
remove_edge_at i | |
for j in 1..items.Length - 1 do | |
let midstate = newstate(string nodes.Count) | |
edges.Add(a, midstate.n, items.[j-1]) | |
edges.Add(midstate.n, b, items.[j]) | |
| (a, b, Star inner) -> | |
remove_edge_at i | |
let start_inner = newstate(string nodes.Count) | |
let stop_inner = newstate(string nodes.Count) | |
edges.Add(a, start_inner.n, Eta) | |
edges.Add(a, b, Eta) | |
edges.Add(start_inner.n, stop_inner.n, inner) | |
edges.Add(stop_inner.n, b, Eta) | |
edges.Add(stop_inner.n, start_inner.n, Eta) | |
| _ -> i <- i + 1 | |
{edges=edges;nodes=nodes;names=names} | |
let rec closure func starting_set = | |
let result_set = Seq.fold (fun s item -> Set.union (func item) s) Set.empty starting_set | |
if result_set = | |
starting_set then result_set | |
else | |
closure func result_set | |
let DfaFromNfa (nfa:'t Graph) (eta: 't) = | |
let eta_transitions = nfa.edges |> Seq.filter (fun (_from, _to, _val) -> _val = eta) | |
let self_transitions = nfa.nodes |> Seq.map (fun i -> (i, i, eta)) | |
let transitions = Seq.concat [eta_transitions; self_transitions] |> Seq.toList | |
let etas_by_source = | |
transitions | |
|> Seq.groupBy (fun (a,b,c) -> a) | |
|> Seq.map (fun (src, items) -> src, Seq.map (fun (a,b,c) -> b) items |> Set.ofSeq) | |
|> List.ofSeq | |
let eta_closure (src, dests) = | |
let links d = etas_by_source |> List.find (fun (a, b) -> a = src) |> snd | |
src, closure links dests | |
let etas = List.map eta_closure etas_by_source | |
let transitions = | |
nfa.edges | |
|> Seq.filter (fun (_from, _to, _val) -> _val <> eta) | |
|> Seq.groupBy (fun (_from, _to, _val) -> _from) | |
|> Seq.map (fun (a, vals) -> a, (vals |> Seq.map (fun (a, b, c) -> c, b) |> List.ofSeq)) | |
|> List.ofSeq | |
let get_transitions nfa_state = | |
transitions |> List.collect (fun (a, rest) -> if Set.contains a nfa_state then rest else []) | |
let dfaStates = ResizeArray<Set<int32>>() | |
let dfaEdges = ResizeArray<(int32 * int32 * 't)>() | |
let get_dfa_state item = | |
match dfaStates.FindIndex (fun a -> a = item) with | |
| -1 -> | |
dfaStates.Add item | |
dfaStates.Count - 1 | |
| n -> n | |
dfaStates.Add(set [0]) | |
let mutable i = 0 | |
while i < dfaStates.Count do | |
for sym, nfa_state in get_transitions dfaStates.[i] do | |
let eta_closure = | |
List.pick (fun (b, e) -> if b = nfa_state then Some e else None) etas | |
let dfa_b = get_dfa_state eta_closure | |
dfaEdges.Add(i, dfa_b, sym) | |
i <- i + 1 | |
{ Graph.edges=dfaEdges | |
Graph.nodes=ResizeArray([0..dfaStates.Count-1]) | |
Graph.names= dfaStates.ConvertAll(fun x -> (string x).[4..].Trim('[', ']'))} | |
let nfa = NfaFromRegex regex | |
let dfa = DfaFromNfa nfa Eta | |
printfn "%O" nfa | |
printfn "%O" dfa |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment