Remove empty literals when concatenating regexes

Create nfa.py, by analogy of regex.py
2019-05-31 14:15:28 +03:00 · 2019-05-31 14:14:52 +03:00
3 changed files with 30 additions and 18 deletions
--- a/nfa.py
+++ b/nfa.py
@ -0,0 +1,10 @@
 from collections import namedtuple
 NFA = namedtuple('NFA', ['start', 'accept', 'transitions'])
 def copy_nfa(nfa):
 	transitions_copy = {}
 	for from_state in nfa.transitions:
 		transitions_copy[from_state] = nfa.transitions[from_state].copy()
 	return NFA(nfa.start, nfa.accept, transitions_copy)
--- a/nfa_to_regex.py
+++ b/nfa_to_regex.py
@ -1,16 +1,7 @@
 import enum
 from collections import namedtuple
 from regex import lit, concat, bar, star
-
+from nfa import NFA, copy_nfa
 NFA = namedtuple('NFA', ['start', 'accept', 'transitions'])
 def copy_nfa(nfa):
 	transitions_copy = {}
 	for from_state in nfa.transitions:
 		transitions_copy[from_state] = nfa.transitions[from_state].copy()
 	return NFA(nfa.start, nfa.accept, transitions_copy)
 def remove_states(nfa):
 	start, accept, transitions = nfa
@ -150,16 +141,19 @@ def prettyprint(nfa):
 		print(process_state(from_state) + '\t' + '\t'.join(t))
 def main():
-	nfa = NFA('start', ['end'], {
+	nfa = NFA('start', ['0'], {
-		'start': {'0': lit('s')},
+		'start': {'1': lit('i'), '2': lit('d')},
-		'0': {'0': lit('0'), '1': lit('1'), 'end': lit('e'), 'start': lit('r')},
+		'0': {'1': lit('i'), '2': lit('d')},
-		'1': {'0': lit('1'), '1': lit('0'), 'start': lit('r')},
+		'1': {'0': lit('d'), '2': lit('i')},
-		'end': {'end': lit('e'), 'start': lit('n')}
+		'2': {'0': lit('i'), '1': lit('d')}
 	})
 	prettyprint(nfa)
-	print(to_regex(nfa))
+	regex = to_regex(nfa)
 	print(repr(regex))
 	print(regex)
 if __name__ == '__main__':
 	main()
--- a/regex.py
+++ b/regex.py
@ -81,7 +81,11 @@ def concat(*elements):
 	combined = []
 	for element in flattened:
-		if len(combined) > 0 and type(combined[-1]) == Literal and type(element) == Literal:
+		if type(element) == Literal and element.text == '':
 			# Drop empty literals
 			continue
 		elif len(combined) > 0 and type(combined[-1]) == Literal and type(element) == Literal:
 			# Combine two literals next to each other
 			# into one literal
 			previous = combined.pop()
@ -90,7 +94,11 @@ def concat(*elements):
 		else:
 			combined.append(element)
-	if len(combined) == 1:
+	if len(combined) == 0:
 		# Empty regex, represent with empty literal
 		return lit('')
 	elif len(combined) == 1:
 		element, = combined
 		return element
 	else:
Author	SHA1	Message	Date
Juhani Krekelä	46aab39ee6	Remove empty literals when concatenating regexes	2019-05-31 14:15:28 +03:00
Juhani Krekelä	8e052ddd97	Create nfa.py, by analogy of regex.py	2019-05-31 14:14:52 +03:00