from __future__ import division import random import sys import re from math import floor from math import ceil from collections import defaultdict _tokenize = re.compile(r'((?:\w{1,20}\'\w+)|(?:\w{1,20})|(?:[.,]))') def ngrams(tokens): print tokens n_tokens = len(tokens) for i in xrange(n_tokens): for j in xrange(i + 2, min(n_tokens, i + 6) + 1): print tuple(tokens[i:j]) yield tuple(tokens[i:j]) def tokenize(s): return _tokenize.findall(s) splits = { 6: 3, 5: 3, 4: 2, 3: 2, 2: 1} class Markov(object): def __init__(self): self.grams = defaultdict(list) def add_text(self, s): for gram in ngrams(tokenize(s)): split = splits[len(gram)] self.grams[gram[:split]].append(gram[split:]) def get_next(self, current): possibilities = [] for gram_size in range(3): test_gram = current[:-gram_size] if gram_size > 0 else current if test_gram in self.grams: possibilities.extend(self.grams[test_gram]) for gram in self.grams[test_gram]: if random.random() > .2: return gram if not possibilities: return None next_part = random.choice(possibilities) return next_part def make_ngram(self, seed=None, max_len=20): if seed is None: part = random.choice(self.grams.keys()) poem = [] while '.' not in part: poem.extend(part) part = self.get_next(tuple(poem[-3:])) if part is None: break if part: poem.extend(part) if '.' in poem: return poem[:poem.index('.')][:max_len] return poem[:max_len] def main(): text = sys.stdin.read() m = Markov() for line in text.splitlines(): m.add_text(line) print ' '.join(m.make_ngram()) if __name__ == '__main__': main()