Skip to content

Instantly share code, notes, and snippets.

@3ki5tj
Created November 18, 2014 23:26
Show Gist options
  • Select an option

  • Save 3ki5tj/c31535f7f897c3f8b098 to your computer and use it in GitHub Desktop.

Select an option

Save 3ki5tj/c31535f7f897c3f8b098 to your computer and use it in GitHub Desktop.
Markov-chain text generator
#!/usr/bin/env python
''' Markov-chain text generator '''
import random, re, sys
from collections import deque
order = 2
fnin = "advshl.txt" # input text
def add(w, pfx, dic):
''' building a Markov chain `dic' by adding `w' into it '''
# join the `order' words in `pfx' by `#' to a string and use it as the key
# e.g., ["hello", "world"] --> "hello#world"
key = '#'.join(pfx)
if key in dic: # the key exists, add a new possibility
dic[key].append(w)
else: # if the key doesn't exist, add a new key
dic[key] = [ w ]
pfx.popleft() # remove the foremost word
pfx.append(w) # append the new word
def gen(n, dic):
''' use the Markov chain `dic' to generate a random text of `n' words '''
pfx = deque([" "] * order) # clear the prefix
for i in range(n):
key = '#'.join(pfx) # form a key from the words
w = random.choice( dic[key] )
if w == " ": break
print w,
pfx.popleft() # remove the foremost word
pfx.append(w) # append the new word `w'
if __name__ == "__main__":
# read and treat input file
if len(sys.argv) > 1:
fnin = sys.argv[1]
inp = open(fnin).read()
inp = re.sub('[#"]', "", inp) # remove any #, ", etc
inp = re.sub("\d+:\d+", "", inp) # remove 12:1
prefix = deque([" "] * order) # a sequence of successive words
dic = {} # a dictionary that maps `prefix' into a list of possible follow-up strings
# build the Markov chain by streaming in words one by one
for s in inp.split():
add(s, prefix, dic)
add(" ", prefix, dic) # mark the ending
gen(500, dic)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment