Last active
December 15, 2015 21:39
-
-
Save dmitry-vsl/5327209 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SimpleRegex | |
def parse(s) | |
@s = s | |
@s_idx = 0 | |
parsetree = parse_alternation [nil] | |
@states = [] | |
build_NFA parsetree | |
end | |
def matches(string) | |
reset | |
activate_state (@states.find {|s| s[:initial]}) | |
string.each_char do |char| | |
make_transition char | |
end | |
return @states.any? {|s| s[:finish] && s[:next]} | |
end | |
private | |
#parsing | |
def nextchar? | |
@s[@s_idx] | |
end | |
def nextchar! | |
res = @s[@s_idx] | |
@s_idx = @s_idx + 1 | |
return res | |
end | |
def expect(chars) | |
if !(chars.include? nextchar?) then | |
raise_unexpected | |
end | |
end | |
def raise_unexpected | |
if nextchar?.nil? then | |
raise "Unexpected end of string" | |
else | |
raise "Unexpected char #{nextchar?} at position #{@s_idx}" | |
end | |
end | |
def parse_alternation(expected_finish) | |
alternatives = [] | |
begin | |
alternatives.push(parse_seq(expected_finish+['|'])) | |
end while nextchar! == '|' | |
return alternatives | |
end | |
def parse_seq(expected_finish) | |
seq = [] | |
while !(expected_finish.include? nextchar?) do | |
seq.push parse_quantified_matcher | |
end | |
return seq | |
end | |
def parse_quantified_matcher | |
matcher = parse_matcher | |
if ['+','*'].include?(nextchar?) | |
return {:matcher=>matcher,:q=>nextchar!} | |
else | |
return {:matcher=>matcher,:q=>nil} | |
end | |
end | |
def parse_matcher | |
if nextchar? == '(' | |
nextchar! | |
return parse_alternation [')'] | |
elsif ('a'...'z') === nextchar? | |
return nextchar! | |
else | |
raise_unexpected | |
end | |
end | |
#build NFA | |
def add_state | |
state = {:initial=>false,:finish=>false,:transitions=>[]} | |
@states.push state | |
state | |
end | |
def build_NFA(parsetree) | |
finishstate = add_state | |
finishstate[:finish] = true | |
initialstate = build_alternation parsetree, finishstate | |
initialstate[:initial] = true | |
end | |
def build_alternation(alternatives, finishstate) | |
if alternatives.size == 1 | |
return build_seq alternatives[0], finishstate | |
end | |
fork = add_state | |
alt_entries = alternatives.collect{|alt| build_seq alt, finishstate} | |
fork[:transitions] = alt_entries.collect do |alt_entry| | |
{:char => nil,:to => alt_entry} | |
end | |
return fork | |
end | |
def build_seq(seq, finishstate) | |
result = finishstate | |
seq.reverse.each do |matcher| | |
matcher_entry = build_quantified_matcher matcher, result | |
result = matcher_entry | |
end | |
return result | |
end | |
def build_quantified_matcher(matcher,finishstate) | |
if matcher[:q] == '+' | |
return build_plus_matcher matcher[:matcher], finishstate | |
elsif matcher[:q] == '*' | |
result = build_plus_matcher matcher[:matcher], finishstate | |
result[:transitions].push({:char=>nil,:to=>finishstate}) | |
return result | |
elsif matcher[:q] == nil | |
return build_matcher(matcher[:matcher],finishstate) | |
else | |
raise 'Unknown quantifier ' + matcher[:q] | |
end | |
end | |
def build_plus_matcher(matcher,finishstate) | |
repeater = add_state | |
result = build_matcher matcher, repeater | |
repeater[:transitions] = [ | |
{:char=>nil,:to=>finishstate}, | |
{:char=>nil,:to=>result} | |
] | |
return result | |
end | |
def build_matcher(matcher,finishstate) | |
if matcher.kind_of? Array | |
return build_alternation matcher,finishstate | |
else | |
result = add_state | |
result[:transitions] = [{:char=>matcher,:to=>finishstate}] | |
return result | |
end | |
end | |
#run NFA | |
def make_transition(char) | |
@states.each do |s| | |
s[:cur] = s[:next] | |
s[:next] = false | |
end | |
@states.select{|s| s[:cur]}.each do |state| | |
state[:transitions].each do |tran| | |
if (tran[:char] == char) | |
newstate = tran[:to] | |
activate_state newstate | |
end | |
end | |
end | |
end | |
def activate_state(state) | |
if(!state[:next]) | |
state[:next] = true | |
state[:transitions].select{|t| t[:char].nil?}.each do |t| | |
activate_state t[:to] | |
end | |
end | |
end | |
def reset | |
@states.each do |s| | |
s[:cur] = false | |
s[:next] = false | |
end | |
end | |
#debug | |
def pretty_states() | |
states = Marshal.load(Marshal.dump(@states)) | |
i = 0 | |
states.each do |s| | |
s.delete(:initial) | |
s.delete(:finish) | |
s[:id] = i | |
i = i + 1 | |
end | |
states.each do |s| | |
s[:transitions].each do |t| | |
t[:to] = t[:to][:id] | |
end | |
end | |
states | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment