Add regex_to_nfa.py
This commit is contained in:
parent
4a789f6b16
commit
440aedbf64
|
@ -0,0 +1,105 @@
|
|||
from regex import Literal, Concatenation, Alternation, Star, lit, concat, bar, star
|
||||
from nfa import NFA, prettyprint
|
||||
|
||||
def to_nfa(regex):
|
||||
def new_state():
|
||||
nonlocal state_name_counter, transitions
|
||||
|
||||
state_name = state_name_counter
|
||||
state_name_counter += 1
|
||||
|
||||
transitions[state_name] = {}
|
||||
|
||||
return state_name
|
||||
|
||||
def worker(node):
|
||||
nonlocal transitions
|
||||
|
||||
if type(node) == Literal:
|
||||
# text
|
||||
# (start) ---------> (end)
|
||||
start_state = new_state()
|
||||
end_state = new_state()
|
||||
|
||||
transitions[start_state][end_state] = lit(node.text)
|
||||
|
||||
return (start_state, end_state)
|
||||
|
||||
elif type(node) == Concatenation:
|
||||
# (start) → […] → […] → […]
|
||||
start_state = new_state()
|
||||
|
||||
prev_state = start_state
|
||||
for element in node.elements:
|
||||
inner_start, inner_end = worker(element)
|
||||
|
||||
# (prev) → (inner_start) → […]
|
||||
transitions[prev_state][inner_start] = lit('')
|
||||
|
||||
# Link next element straight to the inner end
|
||||
# state
|
||||
prev_state = inner_end
|
||||
|
||||
return (start_state, prev_state)
|
||||
|
||||
elif type(node) == Alternation:
|
||||
# +-> […] --+
|
||||
# | |
|
||||
# (start) --+-> […] --+-> (end)
|
||||
# | |
|
||||
# +-> […] --+
|
||||
start_state = new_state()
|
||||
end_state = new_state()
|
||||
|
||||
for element in node.elements:
|
||||
inner_start, inner_end = worker(element)
|
||||
|
||||
# (start) → (inner_start) → […]
|
||||
transitions[start_state][inner_start] = lit('')
|
||||
|
||||
# […] → (inner_end) → (end)
|
||||
transitions[inner_end][end_state] = lit('')
|
||||
|
||||
return (start_state, end_state)
|
||||
|
||||
elif type(node) == Star:
|
||||
# +- […] <-+
|
||||
# | |
|
||||
# v |
|
||||
# (start) --+--> (end)
|
||||
start_state = new_state()
|
||||
end_state = new_state()
|
||||
|
||||
inner_start, inner_end = worker(node.element)
|
||||
|
||||
# (start) → (inner_start) → […]
|
||||
transitions[start_state][inner_start] = lit('')
|
||||
|
||||
# […] → (inner_end) → (start)
|
||||
transitions[inner_end][start_state] = lit('')
|
||||
|
||||
# (start) → (end)
|
||||
transitions[start_state][end_state] = lit('')
|
||||
|
||||
return (start_state, end_state)
|
||||
|
||||
else:
|
||||
raise ValueError('node has to be Literal, Concatenation, Alternation, or Star')
|
||||
|
||||
state_name_counter = 0
|
||||
transitions = {}
|
||||
|
||||
start_state, end_state = worker(regex)
|
||||
|
||||
return NFA(start_state, [end_state], transitions)
|
||||
|
||||
def main():
|
||||
regex = concat(lit('x'), star(bar(lit('a'), lit('b'))), lit('y'))
|
||||
print(regex)
|
||||
|
||||
nfa = to_nfa(regex)
|
||||
|
||||
prettyprint(nfa)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue