Julien00859 · November 12, 2018 00:36
diff --git a/.gitignore b/.gitignore
 *.pyc
 __pycache__
diff --git a/__init__.py b/__init__.py
diff --git a/__main__.py b/__main__.py
 from sys import argv, exit
 from finite_automaton import *
 from parser import *

 def main():
    global auto
    # re = "a*b(c|d)ef"

    n0 = Node()
    n1 = Node("f", n0)
    n2 = Node("e", n1)
    n3 = Node("e", n1)
    n4 = Node("c", n3, "d", n2)
    n5 = Node("b", n4)
    n5.add("a", n5)

    auto = Automaton(n5)
    auto.show_tree()
    match = auto.match(" ".join(argv[1:]))
    if match:
        print("match")
    else:
        print("no match")

    auto = Automaton(groups(r"a.c"))

    for re in [r"ab|cd|e", r"ab*c", r"a.c", r"a|,", r"a*|b*"]:
        print()
        print(re)
        auto = groups(re)
        auto.show_tree()

 if __name__ == "__main__":
    main()
diff --git a/README.md b/README.md
diff --git a/finite_automaton.py b/finite_automaton.py
 from abc import abstractmethod
 from collections import defaultdict
 from itertools import chain, count

 SIGMA = object()
 class Node:
    count = 0
    def __init__(self, *pairs):
        self.nexts = defaultdict(list)
        self.prevs = defaultdict(list)

        ipairs = iter(pairs)
        for character, node in zip(ipairs, ipairs):
            self.add(character, node)

        self.id = Node.count
        Node.count += 1

    def add(self, *pairs):
        ipairs = iter(pairs)
        for character, node in zip(ipairs, ipairs):
            self.nexts[character].append(node)
            node.prevs[character].append(self)

    def rem(self, *pairs):
        ipairs = iter(pairs)
        for character, node in zip(ipairs, ipairs):
            self.nexts[character].remove(node)
            node.prevs[character].remove(self)

    def replace(self, node, character=None):
        if character is None:
            keys = list(self.prevs.keys())
            if len(keys) != 1:
                raise ValueError("Cannot determine previous transition to replace")
            character = keys[0]

        for prev_node in self.prevs[character]:
            prev_node.rem(character, self)
            prev_node.add(character, node)

    def read(self, character):
        return self.nexts.get(character, self.nexts.get(SIGMA, []))

    @property
    def is_terminal(self):
        return len(self.nexts) == 0

    def __repr__(self):
        return "<Node #{}>".format(self.id)

    def __str__(self):
        lines = []
        if not self.nexts:
            return "        ({:2d})→".format(self.id)
        if not self.prevs or all(nodes == [self] for nodes in self.prevs.values()):
            lines.append("→({:2d})".format(self.id))
        for char, nodes in self.nexts.items():
            for node in nodes:
                lines.append(" ({:2d}) {} ({:2d})".format(
                    self.id,
                    (char or "ε") if char is not SIGMA else "Σ",
                    node.id))
        return "\n".join(lines)

 class Automaton:
    def __init__(self, initial_node):
        self.initial_node = initial_node

    def match(self, string):
        nodes = [self.initial_node]
        for char in string:
            next_nodes = []
            for node in nodes:
                next_nodes.extend(node.read(char))
            if not next_nodes:
                return False
            nodes = next_nodes
        for node in nodes:
            if node.is_terminal:
                return True
        return False

    def show_tree(self):
        seen = set()
        show = [self.initial_node]
        while show:
            next_nodes = set()
            for node in show:
                print(node)
                seen.add(node)
                next_nodes = next_nodes.union(*node.nexts.values())
            show = next_nodes - seen

    @classmethod
    def determine(class_, initial_node):
        raise NotImplementedError()
diff --git a/parser.py b/parser.py
 r"""

 Minimal regexp language:

 . : any character
 | : or
 (): group
 * : any time
 , : null
 \ : espape

 Exemple:

 aa*b*(c|,)(d|e|f) = a+b*c?[def]
 ,|0|(1(01*0)1)*
 """

 from finite_automaton import Node, Automaton, SIGMA as SIGMA_

 ESCAPE = "\\"
 OPEN = "("
 CLOSE = ")"
 OR = "|"
 KLEENE = "*"
 EPSILON = ","
 SIGMA = "."

 def groups(string):
    entry_node = Node()
    node_stack = [entry_node, Node()]
    group_stack = [[]]
    escape = False
    for letter in string:
        if escape:
            escape = False
            group_stack[-1].append(letter)
            continue

        if letter == ESCAPE:
            escape = True
        elif letter == OPEN:
            new_in = Node()
            new_out = Node()
            node_stack[-2].add("", new_in)
            new_out.add("", node_stack[-1])
            node_stack.extend([new_in, new_out])
            group_stack.append([])
        elif letter == CLOSE:
            substring = "".join(group_stack.pop())
            out = node_stack.pop()
            in_ = node_stack.pop()
            choices(substring, in_, out)
        else:
            group_stack[-1].append(letter)

    substring = "".join(group_stack.pop())
    out = node_stack.pop()
    in_ = node_stack.pop()
    choices(substring, in_, out)
    return Automaton(entry_node)

 def choices(string, in_, out):
    """
                     ┌─── a→ ───┐
    a|b|c = (0)─ → ─(1)── b→ ──(2)─ → ─(3)
                     └─── c→ ───┘
    """
    choice_stack = [[]]
    escape = False
    for letter in string:
        if escape:
            escape = False
            choice_stack[-1].append(letter)
            continue

        if letter == ESCAPE:
            escape = True
        elif letter == "|":
            choice_stack.append([])
        else:
            choice_stack[-1].append(letter)

    choice_stack = map("".join, choice_stack)
    for choice in choice_stack:
        parse(choice, in_, out)

 def parse(string, in_, out):
    """

    abc = (0)── a→ ──(1)── b→ ──(2)── c→ ──(3)

    """
    escape = False
    nodes = [in_]
    for letter in string:
        if escape:
            node = Node()
            nodes[-1].add((letter, node))
            nodes.append(node)
            escape = False
            continue

        if letter == ESCAPE:
            escape = True
        elif letter == KLEENE:
            prev_out = nodes.pop()
            prev_in = nodes.pop()
            nodes.extend(kleene(prev_in, prev_out))
        else:
            if letter == SIGMA:
                letter = SIGMA_
            elif letter == EPSILON:
                letter = ""

            node = Node()
            nodes[-1].add(letter, node)
            nodes.append(node)

    nodes[-1].replace(out, "")

 def kleene(in_, out):
    """
                    ┌── a→ ─┐
    a* = (0)── → ──(1)─ ← ─(2)── → ──(3)
          └──────────── → ────────────┘
    """
    new_out = Node()
    new_in = Node("", new_out)
    in_.replace(new_in)
    new_in.add("", in_)
    out.add("", in_, "", new_out)
    return new_in, new_out
	from sys import argv, exit
	from finite_automaton import *
	from parser import *

	def main():
	global auto
	# re = "a*b(c\|d)ef"

	n0 = Node()
	n1 = Node("f", n0)
	n2 = Node("e", n1)
	n3 = Node("e", n1)
	n4 = Node("c", n3, "d", n2)
	n5 = Node("b", n4)
	n5.add("a", n5)

	auto = Automaton(n5)
	auto.show_tree()
	match = auto.match(" ".join(argv[1:]))
	if match:
	print("match")
	else:
	print("no match")

	auto = Automaton(groups(r"a.c"))

	for re in [r"ab\|cd\|e", r"abc", r"a.c", r"a\|,", r"a\|b*"]:
	print()
	print(re)
	auto = groups(re)
	auto.show_tree()

	if __name__ == "__main__":
	main()
	from abc import abstractmethod
	from collections import defaultdict
	from itertools import chain, count

	SIGMA = object()
	class Node:
	count = 0
	def __init__(self, *pairs):
	self.nexts = defaultdict(list)
	self.prevs = defaultdict(list)

	ipairs = iter(pairs)
	for character, node in zip(ipairs, ipairs):
	self.add(character, node)

	self.id = Node.count
	Node.count += 1

	def add(self, *pairs):
	ipairs = iter(pairs)
	for character, node in zip(ipairs, ipairs):
	self.nexts[character].append(node)
	node.prevs[character].append(self)

	def rem(self, *pairs):
	ipairs = iter(pairs)
	for character, node in zip(ipairs, ipairs):
	self.nexts[character].remove(node)
	node.prevs[character].remove(self)

	def replace(self, node, character=None):
	if character is None:
	keys = list(self.prevs.keys())
	if len(keys) != 1:
	raise ValueError("Cannot determine previous transition to replace")
	character = keys[0]

	for prev_node in self.prevs[character]:
	prev_node.rem(character, self)
	prev_node.add(character, node)

	def read(self, character):
	return self.nexts.get(character, self.nexts.get(SIGMA, []))

	@property
	def is_terminal(self):
	return len(self.nexts) == 0

	def __repr__(self):
	return "<Node #{}>".format(self.id)

	def __str__(self):
	lines = []
	if not self.nexts:
	return " ({:2d})→".format(self.id)
	if not self.prevs or all(nodes == [self] for nodes in self.prevs.values()):
	lines.append("→({:2d})".format(self.id))
	for char, nodes in self.nexts.items():
	for node in nodes:
	lines.append(" ({:2d}) {} ({:2d})".format(
	self.id,
	(char or "ε") if char is not SIGMA else "Σ",
	node.id))
	return "\n".join(lines)

	class Automaton:
	def __init__(self, initial_node):
	self.initial_node = initial_node

	def match(self, string):
	nodes = [self.initial_node]
	for char in string:
	next_nodes = []
	for node in nodes:
	next_nodes.extend(node.read(char))
	if not next_nodes:
	return False
	nodes = next_nodes
	for node in nodes:
	if node.is_terminal:
	return True
	return False

	def show_tree(self):
	seen = set()
	show = [self.initial_node]
	while show:
	next_nodes = set()
	for node in show:
	print(node)
	seen.add(node)
	next_nodes = next_nodes.union(*node.nexts.values())
	show = next_nodes - seen

	@classmethod
	def determine(class_, initial_node):
	raise NotImplementedError()
	r"""

	Minimal regexp language:

	. : any character
	\| : or
	(): group
	* : any time
	, : null
	\ : espape

	Exemple:

	aab(c\|,)(d\|e\|f) = a+b*c?[def]
	,\|0\|(1(010)1)
	"""

	from finite_automaton import Node, Automaton, SIGMA as SIGMA_

	ESCAPE = "\\"
	OPEN = "("
	CLOSE = ")"
	OR = "\|"
	KLEENE = "*"
	EPSILON = ","
	SIGMA = "."

	def groups(string):
	entry_node = Node()
	node_stack = [entry_node, Node()]
	group_stack = [[]]
	escape = False
	for letter in string:
	if escape:
	escape = False
	group_stack[-1].append(letter)
	continue

	if letter == ESCAPE:
	escape = True
	elif letter == OPEN:
	new_in = Node()
	new_out = Node()
	node_stack[-2].add("", new_in)
	new_out.add("", node_stack[-1])
	node_stack.extend([new_in, new_out])
	group_stack.append([])
	elif letter == CLOSE:
	substring = "".join(group_stack.pop())
	out = node_stack.pop()
	in_ = node_stack.pop()
	choices(substring, in_, out)
	else:
	group_stack[-1].append(letter)

	substring = "".join(group_stack.pop())
	out = node_stack.pop()
	in_ = node_stack.pop()
	choices(substring, in_, out)
	return Automaton(entry_node)

	def choices(string, in_, out):
	"""
	┌─── a→ ───┐
	a\|b\|c = (0)─ → ─(1)── b→ ──(2)─ → ─(3)
	└─── c→ ───┘
	"""
	choice_stack = [[]]
	escape = False
	for letter in string:
	if escape:
	escape = False
	choice_stack[-1].append(letter)
	continue

	if letter == ESCAPE:
	escape = True
	elif letter == "\|":
	choice_stack.append([])
	else:
	choice_stack[-1].append(letter)

	choice_stack = map("".join, choice_stack)
	for choice in choice_stack:
	parse(choice, in_, out)

	def parse(string, in_, out):
	"""

	abc = (0)── a→ ──(1)── b→ ──(2)── c→ ──(3)

	"""
	escape = False
	nodes = [in_]
	for letter in string:
	if escape:
	node = Node()
	nodes[-1].add((letter, node))
	nodes.append(node)
	escape = False
	continue

	if letter == ESCAPE:
	escape = True
	elif letter == KLEENE:
	prev_out = nodes.pop()
	prev_in = nodes.pop()
	nodes.extend(kleene(prev_in, prev_out))
	else:
	if letter == SIGMA:
	letter = SIGMA_
	elif letter == EPSILON:
	letter = ""

	node = Node()
	nodes[-1].add(letter, node)
	nodes.append(node)

	nodes[-1].replace(out, "")

	def kleene(in_, out):
	"""
	┌── a→ ─┐
	a* = (0)── → ──(1)─ ← ─(2)── → ──(3)
	└──────────── → ────────────┘
	"""
	new_out = Node()
	new_in = Node("", new_out)
	in_.replace(new_in)
	new_in.add("", in_)
	out.add("", in_, "", new_out)
	return new_in, new_out