p7g · August 2, 2022 13:12
diff --git a/recompose.py b/recompose.py
 """
 This is a really basic and dumb regexp engine to play with composable regular
 expressions.

 A Re object is basically just a pointer to some initial state and to some
 accepting state. These states can each point to other states, forming an NFA-ε,
 which can then be evaluated directly.

 The cool thing is that you can just combine Re objects like this:

 hello_re = literal("hello")
 world_re = literal("world")
 space_re = literal(" ") | literal("\t")
 spaces_re = space_re + space_re.closure()
 hello_world_re = hello_re + spaces_re + world_re

 assert hello_world_re.evaluate("hello \t  world")
 """

 from __future__ import annotations
 from collections import defaultdict, deque


 class _Epsilon:
 	def __repr__(self) -> str:
 		return "ε"


 epsilon = _Epsilon()


 class State:
 	def __init__(self, transitions: dict[str | _Epsilon, list[State]]) -> None:
 		self.transitions = defaultdict(list, transitions)

 	def _copy(self, accepting_state_mapping, seen=None) -> State:
 		if self is accepting_state_mapping[0]:
 			return accepting_state_mapping[1]

 		seen = seen or {}
 		copy = State({})
 		seen[self] = copy

 		for input_, next_states in self.transitions.items():
 			for next_state in next_states:
 				if next_state in seen:
 					next_state = seen[next_state]
 				else:
 					next_state = seen[next_state] = next_state._copy(accepting_state_mapping, seen)
 				copy.transitions[input_].append(next_state)

 		return copy

 	def __repr__(self) -> str:
 		return f"State({list(self.transitions.keys())})"


 class Re:
 	def __init__(self, initial_state: State, accepting_state: State):
 		self.initial_state = initial_state
 		self.accepting_state = accepting_state

 	def evaluate(self, text: str) -> bool:
 		states = {self.initial_state}

 		for c in text:
 			new_states: set[State] = set()

 			work = deque(states)
 			while work:
 				state = work.popleft()
 				if c in state.transitions:
 					new_states.update(state.transitions[c])
 				if epsilon in state.transitions:
 					work.extend(state.transitions[epsilon])

 			states = new_states

 		work = deque(states)
 		states = set()
 		while work:
 			state = work.popleft()
 			if epsilon in state.transitions:
 				work.extend(state.transitions[epsilon])
 			else:
 				states.add(state)

 		return self.accepting_state in states

 	def _copy(self) -> Re:
 		accepting_state = State({})
 		initial_state = self.initial_state._copy((self.accepting_state, accepting_state))
 		return Re(initial_state, accepting_state)

 	def __or__(self, other):
 		if isinstance(other, str):
 			other = literal(other)
 		elif isinstance(other, Re):
 			pass
 		else:
 			return NotImplemented

 		return alternation(self, other)

 	def __add__(self, other):
 		if isinstance(other, str):
 			other = literal(other)
 		elif isinstance(other, Re):
 			pass
 		else:
 			return NotImplemented

 		return concatenation(self, other)

 	def closure(self) -> Re:
 		return closure(self)


 def literal(text: str) -> Re:
 	current_state = initial_state = State({})

 	for c in text:
 		next_state = State({})
 		current_state.transitions[c] = [next_state]
 		current_state = next_state

 	return Re(initial_state, current_state)


 def alternation(a: Re, b: Re) -> Re:
 	a = a._copy()
 	b = b._copy()

 	initial_state = State(
 		{
 			epsilon: [
 				a.initial_state,
 				b.initial_state,
 			]
 		},
 	)

 	accepting_state = State({})
 	a.accepting_state.transitions[epsilon] = [accepting_state]
 	b.accepting_state.transitions[epsilon] = [accepting_state]

 	return Re(initial_state, accepting_state)


 def concatenation(a: Re, b: Re) -> Re:
 	a = a._copy()
 	b = b._copy()

 	initial_state = State({epsilon: [a.initial_state]})
 	a.accepting_state.transitions[epsilon] = [b.initial_state]

 	accepting_state = State({})
 	b.accepting_state.transitions[epsilon] = [accepting_state]

 	return Re(initial_state, accepting_state)


 def closure(a: Re) -> Re:
 	a = a._copy()

 	accepting_state = State({})
 	initial_state = State({epsilon: [a.initial_state, accepting_state]})

 	a.accepting_state.transitions[epsilon] = [accepting_state, a.initial_state]

 	return Re(initial_state, accepting_state)
	"""
	This is a really basic and dumb regexp engine to play with composable regular
	expressions.

	A Re object is basically just a pointer to some initial state and to some
	accepting state. These states can each point to other states, forming an NFA-ε,
	which can then be evaluated directly.

	The cool thing is that you can just combine Re objects like this:

	hello_re = literal("hello")
	world_re = literal("world")
	space_re = literal(" ") \| literal("\t")
	spaces_re = space_re + space_re.closure()
	hello_world_re = hello_re + spaces_re + world_re

	assert hello_world_re.evaluate("hello \t world")
	"""

	from __future__ import annotations
	from collections import defaultdict, deque


	class _Epsilon:
	def __repr__(self) -> str:
	return "ε"


	epsilon = _Epsilon()


	class State:
	def __init__(self, transitions: dict[str \| _Epsilon, list[State]]) -> None:
	self.transitions = defaultdict(list, transitions)

	def _copy(self, accepting_state_mapping, seen=None) -> State:
	if self is accepting_state_mapping[0]:
	return accepting_state_mapping[1]

	seen = seen or {}
	copy = State({})
	seen[self] = copy

	for input_, next_states in self.transitions.items():
	for next_state in next_states:
	if next_state in seen:
	next_state = seen[next_state]
	else:
	next_state = seen[next_state] = next_state._copy(accepting_state_mapping, seen)
	copy.transitions[input_].append(next_state)

	return copy

	def __repr__(self) -> str:
	return f"State({list(self.transitions.keys())})"


	class Re:
	def __init__(self, initial_state: State, accepting_state: State):
	self.initial_state = initial_state
	self.accepting_state = accepting_state

	def evaluate(self, text: str) -> bool:
	states = {self.initial_state}

	for c in text:
	new_states: set[State] = set()

	work = deque(states)
	while work:
	state = work.popleft()
	if c in state.transitions:
	new_states.update(state.transitions[c])
	if epsilon in state.transitions:
	work.extend(state.transitions[epsilon])

	states = new_states

	work = deque(states)
	states = set()
	while work:
	state = work.popleft()
	if epsilon in state.transitions:
	work.extend(state.transitions[epsilon])
	else:
	states.add(state)

	return self.accepting_state in states

	def _copy(self) -> Re:
	accepting_state = State({})
	initial_state = self.initial_state._copy((self.accepting_state, accepting_state))
	return Re(initial_state, accepting_state)

	def __or__(self, other):
	if isinstance(other, str):
	other = literal(other)
	elif isinstance(other, Re):
	pass
	else:
	return NotImplemented

	return alternation(self, other)

	def __add__(self, other):
	if isinstance(other, str):
	other = literal(other)
	elif isinstance(other, Re):
	pass
	else:
	return NotImplemented

	return concatenation(self, other)

	def closure(self) -> Re:
	return closure(self)


	def literal(text: str) -> Re:
	current_state = initial_state = State({})

	for c in text:
	next_state = State({})
	current_state.transitions[c] = [next_state]
	current_state = next_state

	return Re(initial_state, current_state)


	def alternation(a: Re, b: Re) -> Re:
	a = a._copy()
	b = b._copy()

	initial_state = State(
	{
	epsilon: [
	a.initial_state,
	b.initial_state,
	]
	},
	)

	accepting_state = State({})
	a.accepting_state.transitions[epsilon] = [accepting_state]
	b.accepting_state.transitions[epsilon] = [accepting_state]

	return Re(initial_state, accepting_state)


	def concatenation(a: Re, b: Re) -> Re:
	a = a._copy()
	b = b._copy()

	initial_state = State({epsilon: [a.initial_state]})
	a.accepting_state.transitions[epsilon] = [b.initial_state]

	accepting_state = State({})
	b.accepting_state.transitions[epsilon] = [accepting_state]

	return Re(initial_state, accepting_state)


	def closure(a: Re) -> Re:
	a = a._copy()

	accepting_state = State({})
	initial_state = State({epsilon: [a.initial_state, accepting_state]})

	a.accepting_state.transitions[epsilon] = [accepting_state, a.initial_state]

	return Re(initial_state, accepting_state)