Created
July 14, 2015 23:42
-
-
Save willasaywhat/372d69c2ba762c1554fa to your computer and use it in GitHub Desktop.
Famarkov: A Markov Bot for IRC Logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# FamiLAB Markov Chain Generator | |
# based on: http://agiliq.com/blog/2009/06/generating-pseudo-random-text-with-markov-chains-u/ | |
import random | |
import re | |
IRC_PATTERN = r"(.*)\<(.*)\>(.*)" | |
class Markov(object): | |
def __init__(self, open_file): | |
self.cache = {} | |
self.open_file = open_file | |
self.words = self.file_to_words() | |
self.word_size = len(self.words) | |
self.database() | |
def file_to_words(self): | |
self.open_file.seek(0) | |
data = self.open_file.read() | |
words = self.strip_irc(data).split() | |
return words | |
def strip_irc(self, data_str): | |
prog = re.compile(IRC_PATTERN) | |
w = "" | |
for line in data_str.split('\n'): | |
result = prog.match(line) | |
if result is not None: | |
w = w + ' ' + result.group(3) | |
return w | |
def triples(self): | |
""" Generates triples from the given data string. So if our string were | |
"What a lovely day", we'd generate (What, a, lovely) and then | |
(a, lovely, day). | |
""" | |
if len(self.words) < 3: | |
return | |
for i in range(len(self.words) - 2): | |
yield (self.words[i], self.words[i+1], self.words[i+2]) | |
def database(self): | |
for w1, w2, w3 in self.triples(): | |
key = (w1, w2) | |
if key in self.cache: | |
self.cache[key].append(w3) | |
else: | |
self.cache[key] = [w3] | |
def generate_markov_text(self, size=25): | |
seed = random.randint(0, self.word_size-3) | |
seed_word, next_word = self.words[seed], self.words[seed+1] | |
w1, w2 = seed_word, next_word | |
gen_words = [] | |
for i in xrange(size): | |
gen_words.append(w1) | |
w1, w2 = w2, random.choice(self.cache[(w1, w2)]) | |
gen_words.append(w2) | |
return ' '.join(gen_words) | |
file_ = open('familab.log') | |
marky = Markov(file_) | |
while True: | |
print marky.generate_markov_text(size=100) | |
raw_input("Press any key...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment