Created
September 19, 2016 16:09
-
-
Save south37/62c477824ab0e8be06ac2b6d8518f6a6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'set' | |
class NFAStateFactory | |
class << self | |
def create_new(start:) | |
@label_counter ||= 0 | |
state = NFAState.new(@label_counter, start: start) | |
@label_counter += 1 | |
state | |
end | |
end | |
end | |
class NFAState | |
attr_accessor :start, :goal | |
attr_reader :label | |
def initialize(label, start: false) | |
@label = label | |
@start = start | |
@goal = false | |
end | |
def ==(other) | |
@label == other.label | |
end | |
def inspect | |
"<State:#{@label}#{if @start then ' start' end}#{if @goal then ' goal' end}>" | |
end | |
def to_s | |
inspect | |
end | |
end | |
class NFAEdge | |
attr_reader :from, :to, :token | |
def initialize(from, to, token) | |
@from = from | |
@to = to | |
@token = token | |
end | |
def inspect | |
"<Edge:#{@token} #{@from}-#{@to}>" | |
end | |
def to_s | |
inspect | |
end | |
end | |
module MetaToken | |
Epsilon = :Epsilon | |
end | |
class ParseError < StandardError; end | |
class NFA | |
attr_reader :states, :edges | |
def initialize(states, edges) | |
@states = states | |
@edges = edges | |
end | |
def edges_from(from) | |
@edges.select { |edge| edge.from == from } | |
end | |
def epsilon_edges_from(from) | |
edges_from(from).select { |edge| edge.token == MetaToken::Epsilon } | |
end | |
def goal_state | |
@states.find { |state| state.goal } | |
end | |
def start_state | |
@states.find { |state| state.start } | |
end | |
end | |
class NFACreator | |
attr_reader :states, :edges # Only for debug | |
def initialize(re_str) | |
start_state = NFAStateFactory.create_new(start: true) | |
@states = [start_state] | |
@edges = [] | |
@re_str = re_str | |
@tokens = StringIO.new(@re_str) | |
end | |
def add_normal(token) | |
from = @states.last | |
to = NFAStateFactory.create_new(start: false) | |
@states.push(to) | |
@edges.push(NFAEdge.new(from, to, token)) | |
end | |
def add_star(token) | |
if @states.length < 2 | |
raise ParseError.new("SyntaxError: target of repeat operator is not specified: /#{@re_str}/") | |
end | |
pre_last = @states[-2] | |
last = @states.last | |
@edges.push(NFAEdge.new(last, pre_last, MetaToken::Epsilon)) | |
@edges.push(NFAEdge.new(pre_last, last, MetaToken::Epsilon)) | |
end | |
def add_vertical_var(token) | |
left_start = @states.find { |state| state.start } | |
left_start.start = false | |
left_goal = @states.last | |
right_nfa = NFACreator.new(@tokens.gets).create | |
right_start = right_nfa.start_state | |
right_start.start = false | |
right_goal = right_nfa.goal_state | |
right_goal.goal = false | |
new_start = NFAStateFactory.create_new(start: true) | |
@edges.push(NFAEdge.new(new_start, left_start, MetaToken::Epsilon)) | |
@edges.push(NFAEdge.new(new_start, right_start, MetaToken::Epsilon)) | |
new_goal = NFAStateFactory.create_new(start: false) | |
@edges.push(NFAEdge.new(left_goal, new_goal, MetaToken::Epsilon)) | |
@edges.push(NFAEdge.new(right_goal, new_goal, MetaToken::Epsilon)) | |
@states += right_nfa.states | |
@states.push(new_start) | |
@states.push(new_goal) | |
@edges += right_nfa.edges | |
end | |
def create | |
while token = @tokens.getc | |
case token | |
when '|' | |
add_vertical_var(token) | |
when '*' | |
add_star(token) | |
else | |
add_normal(token) | |
end | |
end | |
@states.last.goal = true | |
NFA.new(@states, @edges) | |
end | |
end | |
class DFAState | |
attr_reader :nfa_states, :start, :goal | |
def initialize(nfa_states, nfa) | |
@nfa_states = Set.new(nfa_states) | |
@nfa = nfa | |
@start = false | |
@goal = false | |
expand_states | |
set_flags | |
end | |
def nfa_edges | |
@nfa_edges ||= nfa_states.to_a.map { |nfa_state| | |
@nfa.edges_from(nfa_state) | |
}.flatten.select { |edge| edge.token != MetaToken::Epsilon } | |
end | |
def ==(other) | |
@nfa_states == other.nfa_states | |
end | |
def inspect | |
"<State:{#{nfa_states.map(&:label).join(', ')}}#{if @start then ' start' end}#{if @goal then ' goal' end}>" | |
end | |
def to_s | |
inspect | |
end | |
private | |
def expand_states | |
changed = true | |
while changed | |
changed = false | |
@nfa_states.to_a.each do |nfa_state| | |
@nfa.epsilon_edges_from(nfa_state).each do |edge| | |
if !@nfa_states.include?(edge.to) | |
@nfa_states.add(edge.to) | |
changed = true | |
end | |
end | |
end | |
end | |
end | |
def set_flags | |
@nfa_states.each do |nfa_state| | |
@start = true if nfa_state.start | |
@goal = true if nfa_state.goal | |
end | |
end | |
end | |
class DFAEdge | |
attr_reader :from, :to, :token | |
def initialize(from, to, token) | |
@from = from | |
@to = to | |
@token = token | |
end | |
def ==(other) | |
@from == other.from && @to == other.to && @token == other.token | |
end | |
def inspect | |
"<Edge:#{@token} #{@from}-#{@to}>" | |
end | |
def to_s | |
inspect | |
end | |
end | |
class DFA | |
attr_reader :states, :edges | |
attr_writer :current_state | |
def initialize(states, edges) | |
@states = states | |
@edges = edges | |
@current_state = start_states.first | |
end | |
def goal? | |
@current_state.goal | |
end | |
def refresh | |
@current_state = start_states.first | |
end | |
def start_states | |
states.select { |state| state.start } | |
end | |
def translate_by(token) | |
next_state = @edges.find { |edge| edge.from == @current_state && edge.token == token }&.to | |
if next_state | |
@current_state = next_state | |
true | |
else | |
false | |
end | |
end | |
end | |
class DFAConverter | |
def initialize(nfa) | |
@nfa = nfa | |
start_state = DFAState.new([@nfa.start_state], @nfa) | |
@states = [start_state] | |
@edges = [] | |
@search_queue = [start_state] | |
end | |
def convert | |
while @search_queue.length > 0 | |
add_state | |
end | |
DFA.new(@states, @edges) | |
end | |
private | |
def add_state | |
from = @search_queue.shift | |
from.nfa_edges.group_by { |edge| edge.token }.map do |token, edges| | |
to = DFAState.new(edges.map(&:to), @nfa) | |
if [email protected]?(to) | |
@states.push(to) | |
@search_queue.push(to) | |
else | |
to = @states.select { |state| state == to }.first | |
end | |
edge = DFAEdge.new(from, to, token) | |
@edges.push(edge) unless @edges.include?(edge) | |
end | |
end | |
end | |
class ReEngine | |
attr_reader :nfa, :dfa | |
def initialize(re_str) | |
@nfa = NFACreator.new(re_str).create | |
@dfa = DFAConverter.new(nfa).convert | |
end | |
def match(str) | |
result = _match(str) | |
@dfa.refresh | |
result | |
end | |
private | |
def _match(str) | |
@dfa.start_states.each do |start_state| | |
@dfa.current_state = start_state | |
tokenize(str).each do |token| | |
return false if [email protected]_by(token) | |
end | |
return true if @dfa.goal? | |
end | |
false | |
end | |
def tokenize(str) | |
StringIO.new(str).each_char | |
end | |
end | |
re = ReEngine.new('b*a*bcd*') | |
p re.nfa.states | |
p re.nfa.edges | |
p re.dfa.states | |
p re.dfa.edges | |
p re.match('ab') | |
p re.match('acdd') | |
p re.match('abcdd') | |
re2 = ReEngine.new('abc|de') | |
p re2.nfa.states | |
p re2.nfa.edges | |
p re2.dfa.states | |
p re2.dfa.edges | |
p re2.match('ab') | |
p re2.match('abc') | |
p re2.match('d') | |
p re2.match('de') | |
# >> [<State:0 start>, <State:1>, <State:2>, <State:3>, <State:4>, <State:5 goal>] | |
# >> [<Edge:b <State:0 start>-<State:1>>, <Edge:Epsilon <State:1>-<State:0 start>>, <Edge:Epsilon <State:0 start>-<State:1>>, <Edge:a <State:1>-<State:2>>, <Edge:Epsilon <State:2>-<State:1>>, <Edge:Epsilon <State:1>-<State:2>>, <Edge:b <State:2>-<State:3>>, <Edge:c <State:3>-<State:4>>, <Edge:d <State:4>-<State:5 goal>>, <Edge:Epsilon <State:5 goal>-<State:4>>, <Edge:Epsilon <State:4>-<State:5 goal>>] | |
# >> [<State:{0, 1, 2} start>, <State:{1, 3, 0, 2} start>, <State:{4, 5} goal>] | |
# >> [<Edge:b <State:{0, 1, 2} start>-<State:{1, 3, 0, 2} start>>, <Edge:a <State:{0, 1, 2} start>-<State:{0, 1, 2} start>>, <Edge:a <State:{1, 3, 0, 2} start>-<State:{0, 1, 2} start>>, <Edge:c <State:{1, 3, 0, 2} start>-<State:{4, 5} goal>>, <Edge:b <State:{1, 3, 0, 2} start>-<State:{1, 3, 0, 2} start>>, <Edge:d <State:{4, 5} goal>-<State:{4, 5} goal>>] | |
# >> false | |
# >> false | |
# >> true | |
# >> [<State:6>, <State:7>, <State:8>, <State:9>, <State:10>, <State:11>, <State:12>, <State:13 start>, <State:14 goal>] | |
# >> [<Edge:a <State:6>-<State:7>>, <Edge:b <State:7>-<State:8>>, <Edge:c <State:8>-<State:9>>, <Edge:Epsilon <State:13 start>-<State:6>>, <Edge:Epsilon <State:13 start>-<State:10>>, <Edge:Epsilon <State:9>-<State:14 goal>>, <Edge:Epsilon <State:12>-<State:14 goal>>, <Edge:d <State:10>-<State:11>>, <Edge:e <State:11>-<State:12>>] | |
# >> [<State:{13, 6, 10} start>, <State:{7}>, <State:{11}>, <State:{8}>, <State:{12, 14} goal>, <State:{9, 14} goal>] | |
# >> [<Edge:a <State:{13, 6, 10} start>-<State:{7}>>, <Edge:d <State:{13, 6, 10} start>-<State:{11}>>, <Edge:b <State:{7}>-<State:{8}>>, <Edge:e <State:{11}>-<State:{12, 14} goal>>, <Edge:c <State:{8}>-<State:{9, 14} goal>>] | |
# >> false | |
# >> true | |
# >> false | |
# >> true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment