Created
July 15, 2015 01:23
-
-
Save willasaywhat/56c0cf084389af36b74d to your computer and use it in GitHub Desktop.
Famamarkov v2: The Markovening
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# FamiLAB Markov Chain Generator | |
# based on: http://agiliq.com/blog/2009/06/generating-pseudo-random-text-with-markov-chains-u/ | |
import random | |
import re | |
import argparse | |
IRC_PATTERN = r"(.*)\<(.*)\>(.*)" | |
class Markov(object): | |
def __init__(self, open_file, nick="", length=100): | |
self.cache = {} | |
self.open_file = open_file | |
self.nick = nick | |
self.length = length | |
self.words = self.file_to_words() | |
self.word_size = len(self.words) | |
self.database() | |
def file_to_words(self): | |
self.open_file.seek(0) | |
data = self.open_file.read() | |
words = self.strip_irc(data).split() | |
return words | |
def strip_irc(self, data_str): | |
prog = re.compile(IRC_PATTERN) | |
w = "" | |
for line in data_str.split('\n'): | |
result = prog.match(line) | |
if result is not None: | |
if self.nick != "": | |
if self.nick == result.group(2): | |
w = w + ' ' + result.group(3) | |
else: | |
w = w + ' ' + result.group(3) | |
return w | |
def triples(self): | |
""" Generates triples from the given data string. So if our string were | |
"What a lovely day", we'd generate (What, a, lovely) and then | |
(a, lovely, day). | |
""" | |
if len(self.words) < 3: | |
return | |
for i in range(len(self.words) - 2): | |
yield (self.words[i], self.words[i+1], self.words[i+2]) | |
def database(self): | |
for w1, w2, w3 in self.triples(): | |
key = (w1, w2) | |
if key in self.cache: | |
self.cache[key].append(w3) | |
else: | |
self.cache[key] = [w3] | |
def generate_markov_text(self): | |
seed = random.randint(0, self.word_size-3) | |
seed_word, next_word = self.words[seed], self.words[seed+1] | |
w1, w2 = seed_word, next_word | |
gen_words = [] | |
for i in xrange(self.length): | |
gen_words.append(w1) | |
w1, w2 = w2, random.choice(self.cache[(w1, w2)]) | |
gen_words.append(w2) | |
return ' '.join(gen_words) | |
filename = "" | |
nick = "" | |
parser = argparse.ArgumentParser(description='FamiLAB Markov Bot') | |
parser.add_argument('--nick', dest='nick', default="", help='Filter input by nickname') | |
parser.add_argument('--file', dest='filename', required=True, help='Input log file name') | |
parser.add_argument('--length', dest='length', type=int, default=100, help='Output length') | |
args = parser.parse_args() | |
print args.filename | |
file_ = open(args.filename) | |
marky = Markov(file_, args.nick, args.length) | |
while True: | |
print marky.generate_markov_text() | |
raw_input("Press any key...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment