Last active
April 25, 2016 18:22
-
-
Save andyhd/2b2d15bef0085bca82017e3a0f3da656 to your computer and use it in GitHub Desktop.
Code golf with Markov string generation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
def build_lookup(corpus, key_size=2): | |
lookup = {} | |
for i, _ in enumerate(corpus): | |
if i + key_size < len(corpus): | |
words = lookup.setdefault(tuple(corpus[i:i + key_size]), []) | |
words.append(corpus[i + key_size]) | |
return lookup | |
def generate_text(lookup, should_end=lambda words: False): | |
words = list(random.choice([key for key in lookup if key[0][0].isupper()])) | |
key_size = len(words) | |
while True: | |
key = tuple(words[-key_size:]) | |
if key not in lookup or should_end(words): | |
break | |
words.append(random.choice(lookup[key])) | |
return ' '.join(words) | |
with open('corpus.txt', 'r') as f: | |
lookup = build_lookup(f.read().split()) | |
print(generate_text(lookup, should_end=lambda words: len(words) > 25)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment