Created
October 27, 2015 13:00
-
-
Save OzTamir/63605a7b3203e36f476e to your computer and use it in GitHub Desktop.
Markov Chains in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| class Markov(object): | |
| def __init__(self, file): | |
| self.cache = {} | |
| self.file = file | |
| self.words = self.file_to_words() | |
| self.word_size = len(self.words) | |
| self.database() | |
| def file_to_words(self): | |
| ''' | |
| Get all the words in the file | |
| ''' | |
| self.file.seek(0) | |
| data = self.file.read() | |
| return data.split() | |
| def generate_triplets(self): | |
| ''' | |
| Generate triplets of words from sentences | |
| Example: "Hey How Are You" Will yield [("Hey", "How", "Are"), ("How", "Are", "You")] | |
| ''' | |
| if self.word_size < 3: | |
| return | |
| for i in xrange(self.word_size - 2): | |
| yield (self.words[i], self.words[i + 1], self.words[i + 2]) | |
| def database(self): | |
| ''' | |
| Create a cache of all the words | |
| ''' | |
| for w1, w2, w3 in self.generate_triplets(): | |
| key = (w1, w2) | |
| if key in self.cache: | |
| self.cache[key].append(w3) | |
| else: | |
| self.cache[key] = [w3] | |
| def __call__(self, length=25): | |
| ''' | |
| Generate a random text using markov chains | |
| ''' | |
| seed = random.randint(0, self.word_size - 3) | |
| while not self.words[seed][0].isupper(): | |
| seed = random.randint(0, self.word_size - 3) | |
| w1, w2 = self.words[seed], self.words[seed + 1] | |
| result = [] | |
| for i in xrange(length): | |
| result.append(w1) | |
| w1, w2 = w2, random.choice(self.cache[(w1, w2)]) | |
| result.append(w2) | |
| return ' '.join(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment