regexen_nfae/nfa_to_regex.py

import enum
from collections import namedtuple

from regex import lit, concat, bar, star

NFA = namedtuple('NFA', ['start', 'accept', 'transitions'])

def copy_nfa(nfa):
	transitions_copy = {}
	for from_state in nfa.transitions:
		transitions_copy[from_state] = nfa.transitions[from_state].copy()

	return NFA(nfa.start, nfa.accept, transitions_copy)

def remove_states(nfa):
	start, accept, transitions = nfa
	states = transitions.keys()

	states_to_remove = [i for i in states if i != start and i not in accept]

	while len(states_to_remove) > 0:
		# Select a state to remove this round
		removed_state = states_to_remove.pop()
		print('\nRemoving state:', removed_state)#debg

		# Remove loops from this state back into itself
		if removed_state in transitions[removed_state]:
			loop_condition = transitions[removed_state][removed_state]
			del transitions[removed_state][removed_state]

			# Prepend (condition)* to all transitions leading out
			# of this state
			for to_state in transitions[removed_state]:
				condition = transitions[removed_state][to_state]
				transitions[removed_state][to_state] = concat(star(loop_condition), condition)

		print(); prettyprint(nfa)#debg

		# Rewrite all transitions A→this→B as A→B transitions
		#
		# If the condition A→this is foo and this→B is bar, the
		# condition for A→B becomes simply foobar
		#
		# Since we've removed all loops back into this state, this
		# results in there being no transitions into this state
		for from_state in transitions:
			if removed_state in transitions[from_state]:
				# Create a list of new transitions to add to the
				# transition table for from_state
				new_transitions = {}
				condition_to_here = transitions[from_state][removed_state]
				for to_state in transitions[removed_state]:
					condition_from_here = transitions[removed_state][to_state]
					new_transitions[to_state] = concat(condition_to_here, condition_from_here)

				# Remove the transition to the state being deleted
				del transitions[from_state][removed_state]

				# Add the new transitions
				# Since they may lead to the same place as
				# already-existing transitions, we may need to
				# combine the conditions with pre-existing ones
				for to_state in new_transitions:
					if to_state in transitions[from_state]:
						# Already a transition leading
						# to the same state
						# If its condition is foo and
						# ours is bar, then the new
						# condition will be foo|bar
						other_condition = transitions[from_state][to_state]
						our_condition = new_transitions[to_state]
						transitions[from_state][to_state] = bar(other_condition, our_condition)

					else:
						# No pre-existing transition
						transitions[from_state][to_state] = new_transitions[to_state]

		# Finally, remove the state we no longer need
		del transitions[removed_state]

		print(); prettyprint(nfa)#debg

	return NFA(start, accept, transitions)

def to_regex(nfa):
	# Rewrite the NFA so that there are no transitions leading in to the
	# start state or any leading out of an accept state. The easy way to
	# do this is by creating a new start state that leads to the old one
	# with empty condition (i.e. it consumes no input), and creating a new
	# accept state that has similar empty condition transitions from all
	# the old ones. Since we have an NFA and not a DFA, that operation is
	# safe
	#
	# As a bonus, this rewrite gives us two useful properties:
	# a) There is exactly one start state and one accept state
	# b) After running remove_state() there will be only one transition,
	#    that of start to accept
	#
	# S
	class _(enum.Enum): start, end = range(2)

	start, accept, transitions = copy_nfa(nfa)

	# Add new start state
	transitions[_.start] = {start: lit('')}

	# Add new accept state and transitions to it
	transitions[_.end] = {}
	for state in accept:
		transitions[state][_.end] = lit('')

	# Package everything into a new NFA
	nfa = NFA(_.start, [_.end], transitions)

	print();prettyprint(nfa)#debg

	processed = remove_states(nfa)

	return processed.transitions[_.start][_.end]

def prettyprint(nfa):
	def process_state(state):
		nonlocal start, accept

		t = ''
		if state == start:
			# Bold
			t += '\x1b[1m'
		if state in accept:
			# Green
			t += '\x1b[32m'

		if t != '':
			return t + str(state) + '\x1b[0m'
		else:
			return str(state)

	start, accept, transitions = nfa
	states = transitions.keys()

	print('\t' + '\t'.join(map(process_state, states)))
	for from_state in states:
		t = []
		for to_state in states:
			if to_state in transitions[from_state]:
				t.append(str(transitions[from_state][to_state]))
			else:
				t.append('\x1b[90m-\x1b[0m')

		print(process_state(from_state) + '\t' + '\t'.join(t))

def main():
	nfa = NFA('start', ['end'], {
		'start': {'0': lit('s')},
		'0': {'0': lit('0'), '1': lit('1'), 'end': lit('e'), 'start': lit('r')},
		'1': {'0': lit('1'), '1': lit('0'), 'start': lit('r')},
		'end': {'end': lit('e'), 'start': lit('n')}
	})

	prettyprint(nfa)

	print(to_regex(nfa))

if __name__ == '__main__':
	main()