Created
March 2, 2016 00:59
-
-
Save andmatand/9739d494227e8e8109f9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
class MarkovChainGenerator(object): | |
def __init__(self, fileHandle): | |
self.words = self.read_words(fileHandle) | |
self.create_database() | |
@staticmethod | |
def read_words(fileHandle): | |
fileHandle.seek(0) | |
data = fileHandle.read() | |
words = data.split() | |
return words | |
def get_next_triplet(self): | |
# Return three words at a time | |
for i in range(len(self.words) - 2): | |
yield (self.words[i], self.words[i + 1], self.words[i + 2]) | |
def create_database(self): | |
self.database = {} | |
for w1, w2, w3 in self.get_next_triplet(): | |
key = (w1, w2) | |
if key in self.database: | |
self.database[key].append(w3) | |
else: | |
self.database[key] = [w3] | |
# Make a list of words that start with a capital letter (and the word | |
# that follows them) | |
self.capitalizedWords = [] | |
for i in range(len(self.words) - 3): | |
word = self.words[i] | |
if word[:1] == word[:1].upper(): | |
pair = (word, self.words[i + 1]) | |
self.capitalizedWords.append(pair) | |
def generate_text(self, length): | |
#seed = random.randint(0, len(self.words) - 3) | |
#w1 = self.words[seed] | |
#w2 = self.words[seed + 1] | |
seed = random.randint(0, len(self.capitalizedWords) - 2) | |
w1 = self.capitalizedWords[seed][0] | |
w2 = self.capitalizedWords[seed][1] | |
generatedWords = [] | |
for i in range(length): | |
generatedWords.append(w1) | |
w1, w2 = w2, random.choice(self.database[(w1, w2)]) | |
generatedWords.append(w2) | |
# TEMP: Format Shakespeare dialogue | |
#out = '' | |
#for word in generatedWords: | |
# # If the word is in ALL CAPS | |
# if len(word) > 3 and word == word.upper(): | |
# out += '\n\n' | |
# out += word | |
# # If the word is ALL CAPS and ends with a colon | |
# if word == word.upper() and word[-1:] == ':': | |
# out += '\n' | |
# else: | |
# out += ' ' | |
#return out | |
return ' '.join(generatedWords) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment