Skip to content

Instantly share code, notes, and snippets.

@andmatand
Created March 2, 2016 00:59
Show Gist options
  • Save andmatand/9739d494227e8e8109f9 to your computer and use it in GitHub Desktop.
Save andmatand/9739d494227e8e8109f9 to your computer and use it in GitHub Desktop.
import random
class MarkovChainGenerator(object):
def __init__(self, fileHandle):
self.words = self.read_words(fileHandle)
self.create_database()
@staticmethod
def read_words(fileHandle):
fileHandle.seek(0)
data = fileHandle.read()
words = data.split()
return words
def get_next_triplet(self):
# Return three words at a time
for i in range(len(self.words) - 2):
yield (self.words[i], self.words[i + 1], self.words[i + 2])
def create_database(self):
self.database = {}
for w1, w2, w3 in self.get_next_triplet():
key = (w1, w2)
if key in self.database:
self.database[key].append(w3)
else:
self.database[key] = [w3]
# Make a list of words that start with a capital letter (and the word
# that follows them)
self.capitalizedWords = []
for i in range(len(self.words) - 3):
word = self.words[i]
if word[:1] == word[:1].upper():
pair = (word, self.words[i + 1])
self.capitalizedWords.append(pair)
def generate_text(self, length):
#seed = random.randint(0, len(self.words) - 3)
#w1 = self.words[seed]
#w2 = self.words[seed + 1]
seed = random.randint(0, len(self.capitalizedWords) - 2)
w1 = self.capitalizedWords[seed][0]
w2 = self.capitalizedWords[seed][1]
generatedWords = []
for i in range(length):
generatedWords.append(w1)
w1, w2 = w2, random.choice(self.database[(w1, w2)])
generatedWords.append(w2)
# TEMP: Format Shakespeare dialogue
#out = ''
#for word in generatedWords:
# # If the word is in ALL CAPS
# if len(word) > 3 and word == word.upper():
# out += '\n\n'
# out += word
# # If the word is ALL CAPS and ends with a colon
# if word == word.upper() and word[-1:] == ':':
# out += '\n'
# else:
# out += ' '
#return out
return ' '.join(generatedWords)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment