Created
November 18, 2014 23:26
-
-
Save 3ki5tj/c31535f7f897c3f8b098 to your computer and use it in GitHub Desktop.
Markov-chain text generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| ''' Markov-chain text generator ''' | |
| import random, re, sys | |
| from collections import deque | |
| order = 2 | |
| fnin = "advshl.txt" # input text | |
| def add(w, pfx, dic): | |
| ''' building a Markov chain `dic' by adding `w' into it ''' | |
| # join the `order' words in `pfx' by `#' to a string and use it as the key | |
| # e.g., ["hello", "world"] --> "hello#world" | |
| key = '#'.join(pfx) | |
| if key in dic: # the key exists, add a new possibility | |
| dic[key].append(w) | |
| else: # if the key doesn't exist, add a new key | |
| dic[key] = [ w ] | |
| pfx.popleft() # remove the foremost word | |
| pfx.append(w) # append the new word | |
| def gen(n, dic): | |
| ''' use the Markov chain `dic' to generate a random text of `n' words ''' | |
| pfx = deque([" "] * order) # clear the prefix | |
| for i in range(n): | |
| key = '#'.join(pfx) # form a key from the words | |
| w = random.choice( dic[key] ) | |
| if w == " ": break | |
| print w, | |
| pfx.popleft() # remove the foremost word | |
| pfx.append(w) # append the new word `w' | |
| if __name__ == "__main__": | |
| # read and treat input file | |
| if len(sys.argv) > 1: | |
| fnin = sys.argv[1] | |
| inp = open(fnin).read() | |
| inp = re.sub('[#"]', "", inp) # remove any #, ", etc | |
| inp = re.sub("\d+:\d+", "", inp) # remove 12:1 | |
| prefix = deque([" "] * order) # a sequence of successive words | |
| dic = {} # a dictionary that maps `prefix' into a list of possible follow-up strings | |
| # build the Markov chain by streaming in words one by one | |
| for s in inp.split(): | |
| add(s, prefix, dic) | |
| add(" ", prefix, dic) # mark the ending | |
| gen(500, dic) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment