regexen_nfae/nfa_to_regex.py

166 lines
4.9 KiB
Python

import enum
from collections import namedtuple
from regex import lit, concat, bar, star
NFA = namedtuple('NFA', ['start', 'accept', 'transitions'])
def copy_nfa(nfa):
transitions_copy = {}
for from_state in nfa.transitions:
transitions_copy[from_state] = nfa.transitions[from_state].copy()
return NFA(nfa.start, nfa.accept, transitions_copy)
def remove_states(nfa):
start, accept, transitions = nfa
states = transitions.keys()
states_to_remove = [i for i in states if i != start and i not in accept]
while len(states_to_remove) > 0:
# Select a state to remove this round
removed_state = states_to_remove.pop()
print('\nRemoving state:', removed_state)#debg
# Remove loops from this state back into itself
if removed_state in transitions[removed_state]:
loop_condition = transitions[removed_state][removed_state]
del transitions[removed_state][removed_state]
# Prepend (condition)* to all transitions leading out
# of this state
for to_state in transitions[removed_state]:
condition = transitions[removed_state][to_state]
transitions[removed_state][to_state] = concat(star(loop_condition), condition)
print(); prettyprint(nfa)#debg
# Rewrite all transitions A→this→B as A→B transitions
#
# If the condition A→this is foo and this→B is bar, the
# condition for A→B becomes simply foobar
#
# Since we've removed all loops back into this state, this
# results in there being no transitions into this state
for from_state in transitions:
if removed_state in transitions[from_state]:
# Create a list of new transitions to add to the
# transition table for from_state
new_transitions = {}
condition_to_here = transitions[from_state][removed_state]
for to_state in transitions[removed_state]:
condition_from_here = transitions[removed_state][to_state]
new_transitions[to_state] = concat(condition_to_here, condition_from_here)
# Remove the transition to the state being deleted
del transitions[from_state][removed_state]
# Add the new transitions
# Since they may lead to the same place as
# already-existing transitions, we may need to
# combine the conditions with pre-existing ones
for to_state in new_transitions:
if to_state in transitions[from_state]:
# Already a transition leading
# to the same state
# If its condition is foo and
# ours is bar, then the new
# condition will be foo|bar
other_condition = transitions[from_state][to_state]
our_condition = new_transitions[to_state]
transitions[from_state][to_state] = bar(other_condition, our_condition)
else:
# No pre-existing transition
transitions[from_state][to_state] = new_transitions[to_state]
# Finally, remove the state we no longer need
del transitions[removed_state]
print(); prettyprint(nfa)#debg
return NFA(start, accept, transitions)
def to_regex(nfa):
# Rewrite the NFA so that there are no transitions leading in to the
# start state or any leading out of an accept state. The easy way to
# do this is by creating a new start state that leads to the old one
# with empty condition (i.e. it consumes no input), and creating a new
# accept state that has similar empty condition transitions from all
# the old ones. Since we have an NFA and not a DFA, that operation is
# safe
#
# As a bonus, this rewrite gives us two useful properties:
# a) There is exactly one start state and one accept state
# b) After running remove_state() there will be only one transition,
# that of start to accept
#
# S
class _(enum.Enum): start, end = range(2)
start, accept, transitions = copy_nfa(nfa)
# Add new start state
transitions[_.start] = {start: lit('')}
# Add new accept state and transitions to it
transitions[_.end] = {}
for state in accept:
transitions[state][_.end] = lit('')
# Package everything into a new NFA
nfa = NFA(_.start, [_.end], transitions)
print();prettyprint(nfa)#debg
processed = remove_states(nfa)
return processed.transitions[_.start][_.end]
def prettyprint(nfa):
def process_state(state):
nonlocal start, accept
t = ''
if state == start:
# Bold
t += '\x1b[1m'
if state in accept:
# Green
t += '\x1b[32m'
if t != '':
return t + str(state) + '\x1b[0m'
else:
return str(state)
start, accept, transitions = nfa
states = transitions.keys()
print('\t' + '\t'.join(map(process_state, states)))
for from_state in states:
t = []
for to_state in states:
if to_state in transitions[from_state]:
t.append(str(transitions[from_state][to_state]))
else:
t.append('\x1b[90m-\x1b[0m')
print(process_state(from_state) + '\t' + '\t'.join(t))
def main():
nfa = NFA('start', ['end'], {
'start': {'0': lit('s')},
'0': {'0': lit('0'), '1': lit('1'), 'end': lit('e'), 'start': lit('r')},
'1': {'0': lit('1'), '1': lit('0'), 'start': lit('r')},
'end': {'end': lit('e'), 'start': lit('n')}
})
prettyprint(nfa)
print(to_regex(nfa))
if __name__ == '__main__':
main()