diff --git a/hw1/fsmutils.py b/hw1/fsmutils.py index 25a9bd0..12af21d 100755 --- a/hw1/fsmutils.py +++ b/hw1/fsmutils.py @@ -13,7 +13,7 @@ def compose(input, *fsts): return output_list if __name__ == '__main__': - f1 = FST('soundex-generate') + f1 = FST('test-generate') # Indicate that '1' is the initial state f1.add_state('start') @@ -28,7 +28,7 @@ def compose(input, *fsts): f1.add_arc('start', 'next', letter, '1') f1.add_arc('next', 'next', letter, '0') - f2 = FST('soundex-generate') + f2 = FST('test-generate') f2.add_state('start') f2.add_state('next') f2.initial_state = 'start' diff --git a/hw1/fst.py b/hw1/fst.py index e046b68..a05b859 100644 --- a/hw1/fst.py +++ b/hw1/fst.py @@ -140,7 +140,7 @@ class FST(object): considered to encode an empty mapping. I.e., transducing any string with such an C{FST} will result in failure. """ - def __init__(self, label): + def __init__(self, label='default'): """ Create a new finite state transducer, containing no states. """ @@ -938,7 +938,8 @@ def step_transduce_subsequential(self, input, step=True): def transduce(self, input): """Transduce the input through the FST - This does not support epsilon input + This does not support epsilon input. + But the epsilon output can be represented as a empty string """ input = tuple(input) output_list = [] @@ -955,16 +956,17 @@ def transduce(self, input): arcs = self.outgoing(state) for arc in arcs: in_string = self.in_string(arc) # a tuple - if tuple(input[in_pos]) == in_string: + if len(in_string) == 0 or tuple(input[in_pos]) == in_string: frontier.append( (arc, in_pos, len(output)) ) if len(frontier) == 0: break arc, in_pos, out_pos = frontier.pop() state = self.dst(arc) assert out_pos <= len(output) - in_pos = in_pos + 1 + if len(self.in_string(arc)) > 0: + in_pos = in_pos + 1 output = output[:out_pos] - # Convert character tuple back into string + # Convert character tuple back into string output.append(''.join(self.out_string(arc))) return output_list