Created
October 15, 2012 15:41
-
-
Save bobmurder/3893172 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
markov chain string generator thing | |
""" | |
from collections import defaultdict | |
import itertools | |
import random | |
import sys | |
# constant | |
sentinel = '\n' | |
def file_to_list(fname): | |
with open(fname) as f: | |
output = ' '.join(line.strip() for line in f) | |
return output.split() | |
# add sentinel value | |
words = file_to_list(fname) + list(sentinel) | |
def triplets(words): | |
# yield triplets until sentinel is reached | |
for idx, word in enumerate(words): | |
if sentinel in (words[idx], words[idx+1]): | |
break | |
else: | |
yield ((words[idx], words[idx + 1]), words[idx + 2]) | |
triplets = triplets(words) | |
def make_chains(triplets): | |
chains = defaultdict(list) | |
for prefix, suffix in triplets: | |
chains[prefix].append(suffix) | |
return chains | |
chains = make_chains(triplets) | |
def generate(words, chains, prefix, max_length=10000): | |
# yield a word until the sentinel value is reached. | |
while True: | |
if sentinel in prefix: | |
break | |
else: | |
word = random.choice(chains[prefix]) | |
yield word | |
prefix = (prefix[1], word) | |
if __name__ == '__main__': | |
prefix = tuple(words[:2]) | |
# this concanenates '\n' and prefix into a list | |
output = [elem for elem in itertools.chain(sentinel, prefix)] | |
for word in generate(words, chains, prefix): | |
output.append(word) | |
print ' '.join(output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment