Created
August 6, 2015 19:45
-
-
Save gulan/c9aa3c74ae62ca279a8c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import sys | |
| """ | |
| This program demonstrates how to recognize and deal with backtracking | |
| while processing sequential files or sequences. | |
| A file is a sequence of runs. A run is any number of 'a', 'b' or 'x', | |
| ending with '\n'. The file ends with a special value eof. | |
| The primitive I/O operation is read(). | |
| A bad run is any that contains an 'x'. A good run is any that is not | |
| bad. | |
| file = file-body + eof | |
| file-body = run* | |
| run = run-body + eol | |
| run-body = good-run | bad-run | |
| bad-run = (a | b | x)+ # but with at least one x | |
| good-run = (a | b)* | |
| Our ideal program: | |
| while more_runs: | |
| if is_good_run: | |
| print 'good' | |
| while more_char: | |
| read() | |
| read() # eol | |
| else: # is_bad_run | |
| print 'bad' | |
| while more-char: | |
| read() | |
| read() # eol | |
| This program would work perfectly, except that it expects is_good_run | |
| to evaluate a future state of the program. The same is true with the | |
| more_* predicates too, but they look ahead only to the next state. | |
| One method to implement this ideal program is to replace the 'if | |
| is_good_run' with an oracle, and backtrack if its answer was | |
| wrong. The ideal is realized as follows in main(). | |
| """ | |
| def main(fin,fout): | |
| while fin.more_run: | |
| try: # Posit that we have good run | |
| fin.mark() | |
| fout.mark() | |
| fout.write('good\n') # See note 1. | |
| while fin.more_char: | |
| ch = fin.read() | |
| if ch not in {'a','b'}: | |
| raise Fail, 'Saw a %r' % ch | |
| assert fin.read() == '\n', 'unexpected char' | |
| fout.commit() | |
| fin.commit() | |
| except Fail: # Admit that we have a bad run. | |
| # Start over from the beginning of the run. | |
| fin.cancel() | |
| fout.cancel() | |
| fout.write('bad\n') | |
| while fin.more_char: | |
| fin.read() | |
| assert fin.read() == '\n', 'unexpected char' | |
| # No need to commit as there were no marks. | |
| # 1. I could have delayed the output until after the loop, and | |
| # would never need to retract it. But in that case I could | |
| # not demonstrate the technique. | |
| class Fail(Exception): | |
| 'Raised when the posited assumption was wrong' | |
| pass | |
| class reader(object): | |
| @property | |
| def more_run(self): return self.buf != '' | |
| @property | |
| def more_char(self): return self.buf != '\n' | |
| def _read(self): | |
| try: | |
| return self.source.next() | |
| except StopIteration: | |
| return '' | |
| def read(self): | |
| # Read-ahead buffering to permit the timely evaluation of the | |
| # more_run and more_chars predicates. | |
| ch = self.buf | |
| self.buf = self._read() | |
| return ch | |
| def __init__(self,source): | |
| self.source = iter(source) | |
| self.buf = self._read() | |
| self.stack = [] | |
| def mark(self): | |
| # May not scale, but the interface is good, and the | |
| # implementation can be changed. | |
| source = list(self.source) | |
| self.stack.append((self.buf,source)) | |
| self.source = iter(source) | |
| def cancel(self): | |
| (self.buf,source) = self.stack.pop() | |
| self.source = iter(source) | |
| def commit(self): | |
| self.stack.pop() | |
| # For a writer, 'mark' buffers the output, and 'commit' causes it to | |
| # print. Cancel discard all writes since the last mark. If the has not | |
| # been a preceding mark, the writes go directly to the output and are | |
| # permanent. | |
| # Generalizing, I note that appending to a sequence is like writing to | |
| # a file. When commit pops the buffer, it will append the most recent | |
| # writes to that buffer. When the buffer stack is empty, these | |
| # 'append' operations are actually writes to the real output file. | |
| class sink_buffer(object): | |
| # very simple string I/O | |
| def write(self,ch): | |
| self.buf.append(ch) | |
| def __iter__(self): | |
| return iter(self.buf) | |
| def __init__(self): | |
| self.buf = [] | |
| class writer(object): | |
| def write(self,ch): | |
| self.sink.write(ch) | |
| def __init__(self,sink=sys.stdout): | |
| self.sink = sink | |
| self.stack = [] | |
| def mark(self): | |
| self.stack.append(self.sink) | |
| self.sink = sink_buffer() | |
| def commit(self): | |
| lagged = self.stack.pop() | |
| # Bring the lagged buffer/printer up to date. | |
| for i in self.sink: | |
| lagged.write(i) | |
| self.sink = lagged | |
| def cancel(self): | |
| # Go back to where we were at the last mark. | |
| self.sink = self.stack.pop() | |
| # The empty run has no 'x', so it is good. | |
| # The lines in data should evaluate as good, good, bad, bad and bad. | |
| data = """abaababbb | |
| xabaababbb | |
| abaaxbabbb | |
| x | |
| abaababbbx | |
| """ | |
| main(reader(data),writer()) | |
| # I call a test a 'demo', if there is no automatic checking of the | |
| # results. | |
| def demo_reader_1(): | |
| """ | |
| expected results: | |
| a b c | |
| d e f | |
| d e f # repeated because I retracted the read, even though the output was 'committed' to stdout. | |
| g h i | |
| j k l | |
| """ | |
| sys.stderr.write('demo_reader_1\n') | |
| f = reader('abcdefghijklmnopqrstuvwxyz') | |
| print f.read(), | |
| print f.read(), | |
| print f.read() | |
| if 1: | |
| f.mark() | |
| print f.read(), | |
| print f.read(), | |
| print f.read() | |
| f.cancel() | |
| # Restart at 'd' | |
| print f.read(), | |
| print f.read(), | |
| print f.read() | |
| if 1: | |
| f.mark() | |
| print f.read(), | |
| print f.read(), | |
| print f.read() | |
| f.commit() # delayed reads made real. | |
| print f.read(), | |
| print f.read(), | |
| print f.read() | |
| def demo_reader_2(): | |
| sys.stderr.write('demo_reader_2\n') | |
| f = reader('abcdefghijklmnopqrstuvwxyz') | |
| if 1: | |
| f.mark() | |
| while f.more_run: | |
| print f.read(), | |
| f.read() # read eof | |
| f.cancel() # discard all reads | |
| # Start over from the beginning | |
| while f.more_run: | |
| print f.read(), | |
| f.read() # read eof | |
| def demo_writer_1(): | |
| sys.stderr.write('demo_writer_1, expect abc\n') | |
| p = writer(sys.stdout) | |
| p.write('a') | |
| p.write('b') | |
| p.write('c') | |
| p.write('\n') | |
| def demo_writer_2(): | |
| sys.stderr.write('demo_writer_2, expect def\n') | |
| p = writer(sys.stdout) | |
| if 1: # these next should all be discarded | |
| p.mark() | |
| p.write('a') | |
| p.write('b') | |
| p.write('c') | |
| p.write('\n') | |
| p.cancel() | |
| p.write('d') | |
| p.write('e') | |
| p.write('f') | |
| p.write('\n') | |
| def demo_writer_3(): | |
| sys.stderr.write('demo_writer_3, expect abc\n') | |
| p = writer(sys.stdout) | |
| p.mark() | |
| p.write('a') | |
| p.write('b') | |
| p.write('c') | |
| p.write('\n') | |
| if 1: # these next should all be discarded | |
| p.mark() | |
| p.write('d') | |
| p.write('e') | |
| p.write('f') | |
| p.write('\n') | |
| p.cancel() | |
| p.commit() | |
| def demo_writer(): | |
| demo_writer_1() | |
| demo_writer_2() | |
| demo_writer_3() | |
| def demo_reader(): | |
| demo_reader_1() | |
| demo_reader_2() | |
| def demo(): | |
| demo_reader() | |
| demo_writer() | |
| # demo() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment