dmitmel · November 15, 2024 15:29
diff --git a/elac_project_1.py b/elac_project_1.py
 from collections import OrderedDict
 from dataclasses import dataclass, field
 from enum import Enum
 import random
 import sys
 import typing as T

 import graphviz


 EPSILON = "\u03b5"
 EMPTY_SET = "\u2205"
 CENTERED_ASTERISK = "\u2217"
 THIN_SPACE = "\u2009"
 HAIR_SPACE = "\u200a"
 FONT_SIZE = 12
 PLUS_IS_UNION = True
 RENDER_GRAPHS = False


 def add_visual_attrs_to_graph(
  graph: graphviz.Digraph | graphviz.Graph, font_size: int = FONT_SIZE
 ) -> None:
  # <https://tex.stackexchange.com/a/642722>
  # <https://www.overleaf.com/learn/latex/Questions%2FWhich_OTF_or_TTF_fonts_are_supported_via_fontspec%3F#Serif_fonts>
  font_attrs = {"fontsize": f"{font_size}pt", "fontname": "CMU Serif"}
  graph.attr("graph", font_attrs, margin="0")
  graph.attr("node", font_attrs)
  graph.attr("edge", font_attrs)


 def format_symbol(text: str):
  return f"<I>{text}</I>" if not text.isnumeric() else text


 @dataclass
 class RegexAtom:
  start_pos: int
  end_pos: int
  grouped: bool = field(default=False, init=False)


 @dataclass
 class RegexSequence(RegexAtom):
  atoms: list[RegexAtom]


 @dataclass
 class RegexUnion(RegexAtom):
  atoms: list[RegexAtom]


 @dataclass
 class RegexRepetition(RegexAtom):
  atom: RegexAtom
  zero: bool


 @dataclass
 class RegexSymbol(RegexAtom):
  character: str


 class RegexParser:
  # expression      ::=  union ;
  # union           ::=  sequence ( "|" sequence )* ;
  # sequence        ::=  ( qualified-atom )* ;
  # qualified-atom  ::=  atom ( "*" | "+" )? ;
  # atom            ::=  symbol | group ;
  # group           ::=  "(" expression ")" ;

  class Token(Enum):
    GROUP_START = 1
    GROUP_END = 2
    UNION = 3
    ONE_OR_MORE = 4
    ZERO_OR_MORE = 5
    LITERAL = 6

  METACHARACTER_TOKENS: dict[str, Token] = {
    "(": Token.GROUP_START,
    ")": Token.GROUP_END,
    "|": Token.UNION if not PLUS_IS_UNION else Token.LITERAL,
    "+": Token.ONE_OR_MORE if not PLUS_IS_UNION else Token.UNION,
    "*": Token.ZERO_OR_MORE,
  }

  def __init__(self, text: str) -> None:
    self.text = text
    self.pos = 0

  def parse(self) -> RegexAtom:
    atom = self.parse_union()
    if self.pos != len(self.text):
      raise Exception("Unexpected input")
    return atom

  def parse_union(self) -> RegexUnion | RegexAtom:
    start_pos = self.pos
    atoms: list[RegexAtom] = [self.parse_sequence()]
    while self.match(self.Token.UNION):
      atoms.append(self.parse_sequence())
    return RegexUnion(start_pos, self.pos, atoms) if len(atoms) > 1 else atoms[0]

  def parse_sequence(self) -> RegexSequence | RegexAtom:
    start_pos = self.pos
    atoms: list[RegexAtom] = []
    while atom := self.parse_quantified_atom():
      atoms.append(atom)
    return RegexSequence(start_pos, self.pos, atoms) if len(atoms) != 1 else atoms[0]

  def parse_quantified_atom(self) -> RegexRepetition | RegexAtom | None:
    start_pos = self.pos
    atom = self.parse_atom()
    if char := self.match(self.Token.ZERO_OR_MORE, self.Token.ONE_OR_MORE):
      if atom is None:
        raise Exception("Nothing to repeat")
      return RegexRepetition(start_pos, self.pos, atom, zero=(char == "*"))
    else:
      return atom

  def parse_atom(self) -> RegexSymbol | RegexAtom | None:
    start_pos = self.pos
    if matched_char := self.match(self.Token.LITERAL):
      return RegexSymbol(start_pos, self.pos, matched_char)
    elif self.match(self.Token.GROUP_START):
      atom = self.parse_union()
      atom.grouped = True
      if not self.match(self.Token.GROUP_END):
        raise Exception("Expected ')'")
      return atom
    else:
      return None

  def match(self, *expected_tokens: Token) -> str | None:
    if 0 <= self.pos < len(self.text):
      char = self.text[self.pos]
      token = self.METACHARACTER_TOKENS.get(char, self.Token.LITERAL)
      if token in expected_tokens:
        self.pos += 1
        return char
    return None


 @dataclass
 class NFAState:
  name: int
  chain_group: int


 @dataclass
 class NFATransition:
  from_state: NFAState
  to_state: NFAState
  symbol: str
  atom_type: type[RegexAtom]


 class _Sortable(T.Protocol):
  def __lt__(self, other: T.Any, /) -> bool: ...


 _State = T.TypeVar("_State", bound=_Sortable)
 _Symbol = T.TypeVar("_Symbol", bound=_Sortable)


 class DFA(T.Generic[_State, _Symbol]):
  __slots__ = "_transition_table", "_symbols", "_states", "_starting_state", "_final_states"

  def __init__(self) -> None:
    self._transition_table = dict[tuple[_State, _Symbol], _State]()
    self._symbols = set[_Symbol]()
    self._states = set[_State]()
    self._starting_state: _State | None = None
    self._final_states = set[_State]()

  @property
  def symbols(self) -> T.AbstractSet[_Symbol]:
    return self._symbols

  @property
  def states(self) -> T.AbstractSet[_State]:
    return self._states

  @property
  def starting_state(self) -> _State | None:
    return self._starting_state

  @property
  def final_states(self) -> T.AbstractSet[_State]:
    return self._final_states

  def add_symbol(self, symbol: _Symbol) -> None:
    assert symbol not in self._symbols
    self._symbols.add(symbol)

  def add_symbols(self, symbols: T.Iterable[_Symbol]) -> None:
    assert self._symbols.isdisjoint(symbols)
    self._symbols.update(symbols)

  def add_state(self, state: _State, starting: bool = False, final: bool = False) -> None:
    assert state not in self._states
    assert not starting or self._starting_state is None
    self._states.add(state)
    if starting:
      self._starting_state = state
    if final:
      self._final_states.add(state)

  def is_starting_state(self, state: _State) -> bool:
    assert state in self._states
    return state == self._starting_state

  def is_final_state(self, state: _State) -> bool:
    assert state in self._states
    return state in self._final_states

  def add_transition(self, state: _State, symbol: _Symbol, next_state: _State) -> None:
    assert state in self._states
    assert symbol in self._symbols
    assert next_state in self._states
    assigned_state = self._transition_table.setdefault((state, symbol), next_state)
    assert assigned_state == next_state

  def transition(self, state: _State, symbol: _Symbol) -> _State | None:
    return self._transition_table.get((state, symbol))

  def all_transitions(self) -> T.Iterator[tuple[tuple[_State, _Symbol], _State]]:
    return iter(self._transition_table.items())

  def to_graph(self) -> graphviz.Digraph:
    graph = graphviz.Digraph()
    add_visual_attrs_to_graph(graph)
    graph.attr("graph", rankdir="LR")
    graph.node("START", shape="plaintext")
    graph.attr("node", shape="circle", margin="0.08,0.04", width="0", height="0")

    for state in sorted(self._states):
      graph.node(str(state), shape="doublecircle" if state in self.final_states else None)

    if self.starting_state is not None:
      graph.edge("START", str(self.starting_state))

    grouped_transitions = dict[tuple[_State, _State], list[_Symbol]]()
    for (state, symbol), next_state in self._transition_table.items():
      grouped_transitions.setdefault((state, next_state), []).append(symbol)

    for (state, next_state), symbols in sorted(grouped_transitions.items()):
      label = "<" + ",".join(map(format_symbol, map(str, sorted(symbols)))) + ">"
      graph.edge(str(state), str(next_state), label)

    return graph


 class RE2NFA:
  def __init__(self, atom: RegexAtom) -> None:
    self.states = list[NFAState]()
    self.transitions = list[NFATransition]()
    self.current_state_id = 0
    self.current_chain_group = 0

    self.start_state = self.add_state(self.current_chain_group)
    self.final_state = self.add_state(self.current_chain_group)

    self.name_state(self.start_state)
    self.convert(atom, self.start_state, self.final_state, self.current_chain_group)
    self.name_state(self.final_state)

  def convert(
    self, atom: RegexAtom, link_start: NFAState, link_end: NFAState, chain_group: int
  ) -> None:
    if isinstance(atom, RegexSymbol):
      self.add_transition(link_start, link_end, atom.character, RegexSymbol)

    elif isinstance(atom, RegexSequence):
      if len(atom.atoms) == 0:
        self.add_transition(link_start, link_end, EPSILON, RegexSequence)
      else:
        link_prev = link_start
        for subatom in atom.atoms[:-1]:
          link_next = self.add_state(chain_group)
          self.convert(subatom, link_prev, link_next, chain_group)
          self.name_state(link_next)
          link_prev = link_next
        self.convert(atom.atoms[-1], link_prev, link_end, chain_group)

    elif isinstance(atom, RegexUnion):
      for subatom in atom.atoms:
        self.current_chain_group += 1
        sub_chain_group = self.current_chain_group
        sub_link_start = self.add_state(sub_chain_group)
        sub_link_end = self.add_state(sub_chain_group)
        self.add_transition(link_start, sub_link_start, EPSILON, RegexUnion)
        self.add_transition(sub_link_end, link_end, EPSILON, RegexUnion)
        self.name_state(sub_link_start)
        self.convert(subatom, sub_link_start, sub_link_end, sub_chain_group)
        self.name_state(sub_link_end)

    elif isinstance(atom, RegexRepetition):
      subatom = atom.atom
      if atom.zero:
        sub_link_start = self.add_state(chain_group)
        sub_link_end = self.add_state(chain_group)
        self.add_transition(link_start, sub_link_start, EPSILON, RegexSequence)
        self.add_transition(sub_link_end, link_end, EPSILON, RegexSequence)
        self.add_transition(link_start, link_end, EPSILON, RegexRepetition)
        self.add_transition(sub_link_end, sub_link_start, EPSILON, RegexRepetition)
        self.name_state(sub_link_start)
        self.convert(subatom, sub_link_start, sub_link_end, chain_group)
        self.name_state(sub_link_end)
      else:
        self.add_transition(link_end, link_start, EPSILON, RegexRepetition)
        self.convert(subatom, link_start, link_end, chain_group)

    else:
      raise Exception(f"Unexpected atom type: {type(atom).__name__}")

  def add_state(self, chain_group: int) -> NFAState:
    state = NFAState(-1, chain_group)
    self.states.append(state)
    return state

  def add_transition(
    self, from_state: NFAState, to_state: NFAState, symbol: str, kind: type[RegexAtom]
  ) -> None:
    self.transitions.append(NFATransition(from_state, to_state, symbol, kind))

  def name_state(self, state: NFAState) -> None:
    state.name = self.current_state_id
    self.current_state_id += 1

  def find_state(self, name: int) -> NFAState | None:
    for state in self.states:
      if state.name == name:
        return state
    return None

  def to_graph(self, hide_state_labels: bool = False) -> graphviz.Digraph:
    graph = graphviz.Digraph()
    add_visual_attrs_to_graph(graph, font_size=20)
    graph.attr("graph", rankdir="LR")

    if hide_state_labels:
      graph.node("START", shape="none", label="", width="0", height="0")
    else:
      graph.node("START", shape="plaintext")

    graph.attr("node", shape="circle", margin="0.04,0.02", width="0", height="0")
    if hide_state_labels:
      graph.attr("node", label=" ")

    for state in self.states:
      graph.node(
        str(state.name),
        shape="doublecircle" if state == self.final_state else None,
        group=str(state.chain_group),
      )

    for transition in self.transitions:
      graph.edge(
        str(transition.from_state.name),
        str(transition.to_state.name),
        "<" + format_symbol(transition.symbol) + ">",
        constraint="false" if transition.atom_type is RegexRepetition else None,
        # weight="0" if transition.atom_type is RegexUnion else None,
      )

    graph.edge("START", str(self.start_state.name))

    return graph


 class NFA2DFA:
  def __init__(self, nfa: RE2NFA) -> None:
    self.nfa_start_state: int = nfa.start_state.name
    self.nfa_final_state: int = nfa.final_state.name

    self.nfa_transition_table = dict[tuple[int, str], list[int]]()
    for transition in nfa.transitions:
      key = (transition.from_state.name, transition.symbol)
      self.nfa_transition_table.setdefault(key, []).append(transition.to_state.name)

    self.subsets_to_names = dict[frozenset[int], str]()
    self.subset_names_generator = iter("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

    self.all_symbols: list[str] = sorted(set(tr.symbol for tr in nfa.transitions) - set(EPSILON))

    self.dfa = DFA[str, str]()
    self.dfa.add_symbols(self.all_symbols)

    self.convert()

  def convert(self) -> None:
    # <https://tex.stackexchange.com/a/67540>
    # <https://tex.stackexchange.com/a/1960>
    # <https://stackoverflow.com/a/65431499/12005228>
    # print(r"{")
    # print(r"\makeatletter")
    # print(r"\def\old@comma{,}")
    # print(r"\catcode`\,=13")
    # print(r"\def,{%")
    # print(r"  \ifmmode%")
    # print(r"    \old@comma\discretionary{}{}{}%")
    # print(r"  \else%")
    # print(r"    \old@comma%")
    # print(r"  \fi%")
    # print(r"}")
    # print(r"\makeatother")
    # print()

    def subset_to_string(subset: frozenset[int]) -> str:
      if len(subset) > 0:
        return r"\{" + ",".join(map(str, subset)) + r"\}"
      else:
        return r"\varnothing"

    print(
      r"We start out by finding and naming the subset reachable from the "
      r"initial state of the NFA:"
    )

    starting_subset = frozenset([self.nfa_start_state])
    print(r"\[ \epscl(%s) = " % subset_to_string(starting_subset), end="")
    starting_subset = self.epsilon_closure(starting_subset)
    _, starting_subset_name = self.name_subset(starting_subset)
    print(r"%s = %s \]" % (subset_to_string(starting_subset), starting_subset_name))

    print(
      r"Next, we will try finding other reachable subsets by moving from the "
      r"known subsets via transitions on all available symbols, until we cannot "
      "assign any more new names."
    )

    queue = [(starting_subset_name, starting_subset)]
    while len(queue) > 0:
      subset_name, subset = queue.pop()

      for symbol in self.all_symbols:
        print(r"\[ \epscl(\move(%s, %s)) = " % (subset_name, symbol), end="")
        new_subset = self.move_function(subset, symbol)
        print(r"\epscl(%s) = " % (subset_to_string(new_subset)), end="")
        new_subset = self.epsilon_closure(new_subset)
        print(subset_to_string(new_subset), end="")
        if len(new_subset) == 0:
          print(r" \]")
          continue
        was_unnamed, new_subset_name = self.name_subset(new_subset)
        print(r" = %s \]" % new_subset_name)
        self.dfa.add_transition(subset_name, symbol, new_subset_name)
        if was_unnamed:
          print(r"\( %s \) is a new subset." % new_subset_name)
          if self.nfa_final_state in new_subset:
            print(r"It also contains the final state.")
          queue.append((new_subset_name, new_subset))

    # print(r"}")
    print()
    print(r"At this point we have discovered and named all unique reachable subsets.")

    # self.dfa.add_state("Z")
    # for symbol in self.all_symbols:
    #   self.dfa.add_transition("Z", symbol, "Z")
    # for state in self.dfa.states:
    #   for symbol in self.all_symbols:
    #     if self.dfa.transition(state, symbol) is None:
    #       print(state, symbol, file=sys.stderr)
    #       self.dfa.add_transition(state, symbol, "Z")

  def move_function(self, starting_subset: frozenset[int], symbol: str) -> frozenset[int]:
    reachable = set[int]()
    for state in starting_subset:
      transitions = self.nfa_transition_table.get((state, symbol), [])
      reachable.update(transitions)
    return frozenset(reachable)

  def epsilon_closure(self, starting_subset: frozenset[int]) -> frozenset[int]:
    queue = list(starting_subset)
    visited = set(starting_subset)

    while len(queue) > 0:
      state = queue.pop()
      transitions = self.nfa_transition_table.get((state, EPSILON), [])
      for next_state in transitions:
        if next_state not in visited:
          visited.add(next_state)
          queue.append(next_state)

    return frozenset(visited)

  def name_subset(self, subset: frozenset[int]) -> tuple[bool, str]:
    assert len(subset) > 0
    name = self.subsets_to_names.get(subset)
    if name is None:
      name = next(self.subset_names_generator)
      self.subsets_to_names[subset] = name
      self.dfa.add_state(
        name, starting=self.nfa_start_state in subset, final=self.nfa_final_state in subset
      )
      return True, name
    else:
      return False, name


 def minimize_dfa(dfa: DFA[_State, _Symbol]) -> DFA[_State, _Symbol]:
  marked_pairs = set[tuple[_State, _State]]()
  unmarked_pairs = set[tuple[_State, _State]]()

  dfa_states = sorted(dfa.states)
  dfa_symbols = sorted(dfa.symbols)

  def is_pair_marked(state1: _State, state2: _State) -> bool:
    return (state1, state2) in marked_pairs or (state2, state1) in marked_pairs

  def print_distinguishable_table() -> None:
    print(r"\begin{tabular}{|r" + "|c" * len(dfa_states) + "|}")
    print(r"\hline")
    print("& " + " & ".join(map(str, dfa_states)) + " \\\\")
    for i, state1 in enumerate(dfa_states):
      print(r"\hline")
      marks = list[str]()
      for j, state2 in enumerate(dfa_states):
        if i == j:
          marks.append(r"\cellcolor{lightgray}{--}")
        elif is_pair_marked(state1, state2):
          marks.append(r"$\times$")
        else:
          marks.append(" ")
      print(str(state1) + " & " + " & ".join(marks) + " \\\\")
    print(r"\hline")
    print(r"\end{tabular}")

  print(
    r"We draw a table for pairs of all states, and we will be marking every pair "
    r"where it is known that the two states are definitely distinguishable. We start "
    r"out by marking every pair that can be distinguished by the fact of only one of "
    r"the states being a final state:"
  )

  for state1 in dfa_states:
    for state2 in dfa_states:
      if state1 != state2:
        distinguishable = dfa.is_final_state(state1) != dfa.is_final_state(state2)
        pair = (state1, state2) if state1 < state2 else (state2, state1)
        (marked_pairs if distinguishable else unmarked_pairs).add(pair)

  print(r"\begin{table}[H]")
  print(r"\centering")
  print_distinguishable_table()
  print(r"\caption{Distinguishable states.}")
  print(r"\end{table}")
  print()

  print(
    r"Next, we will repeatedly check all pairs that have remained unmarked. A pair "
    r"can be marked if a transition on any symbol leads to a pair that is already "
    r"known to be distinguishable (therefore this property is transitive)."
  )
  print(r"\begin{enumerate}")

  while True:
    marked_any = False

    for pair in sorted(unmarked_pairs):
      state1, state2 = pair
      for symbol in dfa_symbols:
        next_state1 = dfa.transition(state1, symbol)
        next_state2 = dfa.transition(state2, symbol)
        pair1_text = (state1, state2)
        pair2_text = (next_state1 or r"\text{--}", next_state2 or r"\text{--}")
        print(
          r"\item \( (%s, %s) \xrightarrow{%s} (%s, %s) \)" % (*pair1_text, symbol, *pair2_text)
        )

        distinguishable = False
        if next_state1 is not None and next_state2 is not None:
          if is_pair_marked(next_state1, next_state2):
            distinguishable = True
            print(
              r" --- distinguishable because the pair \( (%s, %s) \) has been marked" % pair2_text
            )
        elif (next_state1 is None) != (next_state2 is None):
          distinguishable = True
          print(r" --- distinguishable because of missing transitions")

        if distinguishable:
          unmarked_pairs.remove(pair)
          marked_pairs.add(pair)
          marked_any = True
          break
        else:
          print(r" --- cannot say anything about this pair yet")

    if not marked_any:
      break

  print(r"\end{enumerate}")
  print()
  print(r"The final table of distinguishable pairs of states looks like:")
  print()

  print(r"\begin{table}[H]")
  print(r"\centering")
  print_distinguishable_table()
  print(r"\caption{Distinguishable states in our DFA.}")
  print(r"\end{table}")
  print()

  if len(unmarked_pairs) > 1:
    print(
      r"Some pairs have remained unmarked, which means that some states can be "
      r"merged with equivalent ones and collapsed into one state."
    )
  else:
    print(
      r"All pairs have been marked, which means that there are no equivalent states. "
      r"Our DFA cannot be minimized further."
    )
  print()

  equivalence_classes = dict[_State, set[_State]]()
  for state1, state2 in unmarked_pairs:
    new_class = set[_State]()
    class1 = equivalence_classes.setdefault(state1, new_class)
    class2 = equivalence_classes.setdefault(state2, new_class)
    if class1 is not class2:
      class1.update(class2)
      class2 = class1
      equivalence_classes[state2] = class2
    class1.add(state1)
    class2.add(state2)

  for state in dfa_states:
    equivalence_classes.setdefault(state, {state})

  new_dfa_states_mapping: dict[_State, _State] = {
    state: min(equiv_class) for state, equiv_class in equivalence_classes.items()
  }

  equivalence_classes_with_merges: list[_State] = [
    s for s in set(new_dfa_states_mapping.values()) if len(equivalence_classes[s]) > 1
  ]
  if len(equivalence_classes_with_merges) > 0:
    print("Equivalence classes are:")
    print(r"\begin{enumerate}")
    for renamed_state in equivalence_classes_with_merges:
      print(
        r"\item \( (%s) \rightarrow %s \)"
        % (", ".join(map(str, sorted(equivalence_classes[renamed_state]))), renamed_state)
      )
    print(r"\end{enumerate}")
    print()

  new_dfa = DFA[_State, _Symbol]()
  new_dfa.add_symbols(dfa.symbols)
  for renamed_state in set(new_dfa_states_mapping.values()):
    new_dfa.add_state(
      renamed_state,
      starting=dfa.starting_state in equivalence_classes[renamed_state],
      final=not dfa.final_states.isdisjoint(equivalence_classes[renamed_state]),
    )

  for (from_state, symbol), to_state in dfa.all_transitions():
    from_state = new_dfa_states_mapping[from_state]
    to_state = new_dfa_states_mapping[to_state]
    new_dfa.add_transition(from_state, symbol, to_state)

  return new_dfa


 def re2tree(atom: RegexAtom, atom_numbers: dict[int, int]) -> graphviz.Digraph:
  graph = graphviz.Digraph()
  add_visual_attrs_to_graph(graph)
  graph.attr("graph", rankdir="TB")
  graph.attr("node", shape="box", margin="0.08,0.06", width="0", height="0")

  next_atom_number = 0

  def atom_node_id(atom: RegexAtom) -> str:
    nonlocal next_atom_number
    number = atom_numbers.get(id(atom))
    if number is None:
      next_atom_number += 1
      number = next_atom_number
      atom_numbers[id(atom)] = number
    return str(number)

  def atom_label(atom: RegexAtom) -> str:
    return f"<I>R</I><SUB>{atom_node_id(atom)}</SUB>"

  def atom_refer(atom: RegexAtom) -> str:
    return f"({HAIR_SPACE}{atom_label(atom)}{HAIR_SPACE})" if atom.grouped else atom_label(atom)

  def recurse(atom: RegexAtom) -> None:
    node_id = atom_node_id(atom)
    atom_def: str
    subatoms: T.Iterable[RegexAtom]

    if isinstance(atom, RegexSymbol):
      subatoms = []
      atom_def = format_symbol(atom.character)
    elif isinstance(atom, RegexRepetition):
      subatoms = [atom.atom]
      atom_def = atom_refer(atom.atom) + THIN_SPACE + (CENTERED_ASTERISK if atom.zero else "+")
    elif isinstance(atom, RegexUnion):
      subatoms = atom.atoms
      joiner = " + " if PLUS_IS_UNION else " | "
      atom_def = joiner.join(map(atom_refer, atom.atoms)) if atom.atoms else format_symbol(EPSILON)
    elif isinstance(atom, RegexSequence):
      subatoms = atom.atoms
      atom_def = " ".join(map(atom_refer, atom.atoms)) if atom.atoms else format_symbol(EPSILON)
    else:
      raise Exception(f"Unexpected atom type: {type(atom).__name__}")

    graph.node(node_id, f"<{atom_label(atom)} = {atom_def}>")
    for subatom in subatoms:
      recurse(subatom)
    for subatom in subatoms:
      graph.edge(node_id, atom_node_id(subatom))

  recurse(atom)
  return graph


 def regex_construction_steps(
  regex_text: str, root_atom: RegexAtom, atom_numbers: dict[int, int]
 ) -> None:
  steps = OrderedDict[str, list[RegexAtom]]()

  def recurse(atom: RegexAtom) -> None:
    subatoms: T.Iterable[RegexAtom]
    if isinstance(atom, RegexSymbol):
      subatoms = []
    elif isinstance(atom, RegexRepetition):
      subatoms = [atom.atom]
    elif isinstance(atom, RegexSequence):
      subatoms = atom.atoms
    elif isinstance(atom, RegexUnion):
      subatoms = atom.atoms
    else:
      raise Exception(f"Unexpected atom type: {type(atom).__name__}")

    for subatom in subatoms:
      recurse(subatom)

    atom_text = regex_text[atom.start_pos : atom.end_pos]
    steps.setdefault(atom_text, []).append(atom)

  recurse(root_atom)

  for i, (atom_text, similar_atoms) in enumerate(steps.items()):
    step = i + 1
    graph = RE2NFA(similar_atoms[0]).to_graph(hide_state_labels=True)
    print(r"\begin{figure}[H]")
    print(r"\centering")
    print(r"\digraph[width=\maxwidth]{step%s}{" % step)
    print("".join(graph.body), end="")
    print(r"}")
    atom_references = " = ".join("R_{%s}" % atom_numbers[id(atom)] for atom in similar_atoms)
    print(
      r"\caption{NFA construction step %s, \( %s = \text{``\texttt{%s}''} \).}"
      % (step, atom_references, atom_text)
    )
    print(r"\end{figure}")
    print()


 def random_regex_example_string(atom: RegexAtom, shortest: bool = False) -> str:
  def recurse(atom: RegexAtom) -> str:
    if isinstance(atom, RegexSymbol):
      return atom.character
    elif isinstance(atom, RegexSequence):
      return "".join(recurse(a) for a in atom.atoms)
    elif isinstance(atom, RegexRepetition):
      result = "" if atom.zero else recurse(atom.atom)
      if not shortest:
        while bool(random.randint(0, 1)):
          result += recurse(atom.atom)
      return result
    elif isinstance(atom, RegexUnion):
      if not shortest:
        return recurse(random.choice(atom.atoms))
      else:
        return min(map(recurse, atom.atoms), key=len)
    else:
      raise Exception(f"Unexpected atom type: {type(atom).__name__}")

  return recurse(atom)


 def print_dfa_transitions(dfa: DFA[str, str]) -> None:
  symbols = sorted(dfa.symbols)
  print(r"\begin{tabular}{|r" + "|c" * len(symbols) + "|}")
  print(r"\hline")
  print("& " + " & ".join(f"${s}$" for s in symbols) + " \\\\")
  for state in sorted(dfa.states):
    transitions = (dfa.transition(state, symbol) or "--" for symbol in symbols)
    marker = ""
    if dfa.is_starting_state(state):
      marker += r"$\rightarrow$ "
    if dfa.is_final_state(state):
      marker += r"$*$ "
    print(r"\hline")
    print(marker + state + " & " + " & ".join(transitions) + " \\\\")
  print(r"\hline")
  print(r"\end{tabular}")


 def check_dfa(dfa: DFA[str, str], example: str) -> str:
  current_state = dfa.starting_state
  assert current_state is not None
  path: list[str] = [current_state]
  for c in example:
    current_state = dfa.transition(current_state, c)
    path.append(r"\xrightarrow{%s}" % c)
    if current_state is not None:
      path.append(current_state)
    else:
      path.append(r"\varnothing")
      break
  accepted = current_state is not None and dfa.is_final_state(current_state)
  status = "accepted" if accepted else "rejected"
  return r"``\texttt{%s}'': \( %s \Rightarrow \text{%s} \)" % (example, " ".join(path), status)


 def main() -> None:
  [_, author, regex_text, *argv_examples] = sys.argv

  print(r"\documentclass[%spt,a4paper,titlepage,fleqn]{article}" % FONT_SIZE)
  print(r"\usepackage[margin=3cm]{geometry}")
  print(r"\usepackage[utf8]{inputenc}")
  print(r"\usepackage[english]{babel}")
  print(r"\usepackage{parskip}")
  print(r"\usepackage{graphicx}")
  print(r"\usepackage[table]{xcolor}")
  print(r"\usepackage[colorlinks=true,allcolors=blue]{hyperref}")
  print(r"\usepackage{amsmath}")
  print(r"\usepackage{amssymb}")
  print(r"\usepackage{mathtools}")
  # print(r"\usepackage[euler]{textgreek}")
  print(r"\usepackage[Euler]{upgreek}")
  print(r"\usepackage{float}")
  print(r"\usepackage{shellesc}")
  print(r"\usepackage[pdf,singlefile]{graphviz}")
  print(r"\usepackage{xpatch}")
  print()

  # print(r"\usepackage{tikz}")
  # print(r"\usetikzlibrary{automata,positioning,graphs,graphdrawing}")
  # print(r"\usegdlibrary{force}")

  # print(r"\newcommand{\acceptingdoubledistance}{2pt}")
  # # <https://github.com/pgf-tikz/pgf/blob/3.1.10/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibraryautomata.code.tex#L35>
  # print(
  #   r"\tikzset{accepting by double/.style={double distance=\acceptingdoubledistance,outer sep=.5\pgflinewidth+.5\acceptingdoubledistance}}"
  # )

  # Taken from <https://www.overleaf.com/latex/templates/graphviz-drawing-example/xhrpxynbfxvw>
  print(r"\makeatletter")
  print(r"\newcommand*{\addGraphFileDependency}[1]{%")
  print(r"  \typeout{(#1)} \@addtofilelist{#1} \IfFileExists{#1}{}{\typeout{No file #1.}} }")
  print(r"\makeatother")
  print(r"\xpretocmd{\digraph}{\addGraphFileDependency{#2.dot}}{}{}")
  print()

  # <https://davidcarlisle.github.io/uk-tex-faq/FAQ-grmaxwidth.html>
  # <https://tex.stackexchange.com/a/86355>
  print(r"\makeatletter")
  print(r"\newcommand{\maxwidth}{%")
  print(r"  \ifdim \Gin@nat@width>\linewidth \linewidth \else \Gin@nat@width \fi}")
  print(r"\makeatother")
  print()

  print(r"\DeclareMathOperator{\move}{move}")
  print(r"\DeclareMathOperator{\epscl}{\upvarepsilon-closure}")
  print()

  print(r"\title{ELAC Project 1 \\ ~ \\ Regular Expression ``\texttt{%s}''}" % regex_text)
  print(r"\author{%s}" % author)
  print(r"\date{\today}")
  print()

  print(r"\begin{document}")
  print()

  print(r"\maketitle")
  print()

  parser = RegexParser(regex_text)
  regex = parser.parse()

  print(r"\section{Example strings} \label{sec:examples}")
  print()

  examples = set[str](argv_examples)
  attempt = 0
  while len(examples) < 5 and attempt < 1000:
    examples.add(random_regex_example_string(regex))
    attempt += 1
  examples_sorted = sorted(examples, key=lambda s: (len(s), s))

  print(r"Here are some examples of strings accepted by our regular expression:")
  print(r"\begin{enumerate}")
  for s in examples_sorted:
    print(r"\item ``\texttt{%s}''" % s)
  print(r"\end{enumerate}")
  print()

  print(r"\section{Construction of an NFA}")
  print()

  atom_numbers = dict[int, int]()
  tree = re2tree(regex, atom_numbers)
  if RENDER_GRAPHS:
    tree.render("elac_project_1_regex.gv", format="png", cleanup=True)

  print(r"\begin{figure}[H]")
  print(r"\centering")
  print(r"\digraph[width=\maxwidth]{regex}{")
  print("".join(tree.body), end="")
  print(r"}")
  print(
    r"\caption{Decomposition of our regular expression into elementary "
    r"sub-expressions $R_{i}$, presented as a tree in order to make the "
    r"relations between the operators and sub-expressions easier to follow.}"
  )
  print(r"\end{figure}")
  print()

  print(
    r"To build an NFA with $\upvarepsilon$-transitions equivalent to our regular "
    r"expression, we will apply \textbf{Thompson's construction algorithm}. "
    r"Construction steps of similar sub-expressions $R_{i}$ have been combined together."
  )
  print()

  regex_construction_steps(regex_text, regex, atom_numbers)

  nfa = RE2NFA(regex)
  if RENDER_GRAPHS:
    nfa.to_graph().render("elac_project_2_nfa.gv", format="png", cleanup=True)

  print(r"Finally, to complete the NFA we assign numbers to all states as their names.")
  print()

  print(r"\begin{figure}[H]")
  print(r"\centering")
  print(r"\digraph[width=\maxwidth]{nfa}{")
  print("".join(nfa.to_graph().body), end="")
  print(r"}")
  print(r"\caption{Diagram of the completed NFA.}")
  print(r"\end{figure}")
  print()

  print(r"\section{Transformation of the NFA into a DFA}")
  print()

  print(
    r"The $\upvarepsilon$-NFA is transformed into an equivalent DFA using the "
    r"\textbf{subset construction algorithm}."
  )
  dfa = NFA2DFA(nfa).dfa
  if RENDER_GRAPHS:
    dfa.to_graph().render("elac_project_3_dfa.gv", format="png", cleanup=True)

  print(
    r"We can use the equivalences established by "
    r"\( \epscl(\move(subset, symbol)) = next \: subset \) "
    r"to build a transition table for a DFA, whose states will correspond one-to-one "
    r"to the reachable subsets of the $\upvarepsilon$-NFA and be named accordingly. "
    r"The transitions which result in an empty set $\varnothing$ will be filled simply "
    "with blanks in the table."
  )
  print()

  print(r"\begin{table}[H]")
  print(r"\centering")
  print_dfa_transitions(dfa)
  print(
    r"\caption{Transition table of our DFA. The starting state is marked with "
    r"$\rightarrow$, and the final state%s %s marked with $*$.}"
    % ("s" if len(dfa.final_states) != 1 else "", "are" if len(dfa.final_states) != 1 else "is")
  )
  print(r"\label{tab:dfa}")
  print(r"\end{table}")
  print()

  print(r"\begin{figure}[H]")
  print(r"\centering")
  print(r"\digraph[width=\maxwidth]{dfa}{")
  print("".join(dfa.to_graph().body), end="")
  print(r"}")
  print(r"\caption{Diagram of the completed DFA, constructed from table~\ref{tab:dfa}.}")
  print(r"\end{figure}")
  print()

  print(r"Let us try checking our DFA with the example strings from section~\ref{sec:examples}:")
  print(r"\begin{enumerate}")
  for example in examples_sorted:
    print(r"\item %s" % check_dfa(dfa, example))
  print(r"\end{enumerate}")
  print()

  print(r"\section{Construction of a minimized DFA}")
  print()
  print(r"DFA minimization is performed using the \textbf{table-filling algorithm}.")

  mindfa = minimize_dfa(dfa)
  if RENDER_GRAPHS:
    mindfa.to_graph().render("elac_project_4_mindfa.gv", format="png", cleanup=True)

  print(r"\begin{table}[H]")
  print(r"\centering")
  print_dfa_transitions(mindfa)
  print(
    r"\caption{Transition table of our minimized DFA. The starting state is "
    r"marked with $\rightarrow$, and the final state%s %s marked with $*$.}"
    % (
      "s" if len(mindfa.final_states) != 1 else "",
      "are" if len(mindfa.final_states) != 1 else "is",
    )
  )
  print(r"\label{tab:mindfa}")
  print(r"\end{table}")
  print()

  print(r"\begin{figure}[H]")
  print(r"\centering")
  print(r"\digraph[width=\maxwidth]{mindfa}{")
  print("".join(mindfa.to_graph().body), end="")
  print(r"}")
  print(r"\caption{Diagram of the minimized DFA, constructed from table~\ref{tab:mindfa}.}")
  print(r"\end{figure}")
  print()

  print(
    r"Let us try checking the minimized DFA with example strings from "
    r"section~\ref{sec:examples} as well:"
  )
  print(r"\begin{enumerate}")
  for example in examples_sorted:
    print(r"\item %s" % check_dfa(mindfa, example))
  print(r"\end{enumerate}")
  print()

  print(r"\end{document}")


 if __name__ == "__main__":
  main()