Skip to content

Instantly share code, notes, and snippets.

@JnBrymn
Last active August 29, 2015 13:57
Show Gist options
  • Save JnBrymn/9885157 to your computer and use it in GitHub Desktop.
Save JnBrymn/9885157 to your computer and use it in GitHub Desktop.
Simple Markov Model
from collections import defaultdict
import random
class MarkovModel(object):
"""
Takes iterator of tokens and makes a markov model of the tokens. n is the "order" of the model
None is a special token that serves as a sort of delimiter of phrases.
"""
@classmethod
def _tokenizer(cls,text,token_delim):
for phrase in text.split("\n"):
for token in phrase.split(token_delim):
yield token
yield None
@classmethod
def fromText(cls,text,token_delim=".",n=1):
return MarkovModel(MarkovModel._tokenizer(text,token_delim),n)
def __init__(self,token_iterator,n=1):
self.n = n
self.model_dict = defaultdict(lambda: {"count":0,"tokens_and_counts":defaultdict(int)})
key = (None,) #this is a tuple
for token in token_iterator:
sub_dict = self.model_dict[key]
sub_dict["count"] += 1
sub_dict["tokens_and_counts"][token] += 1
key = self._shift_key(key,token)
self.model_dict.default_factory = lambda:None #make it so that you can't add anything new
def __repr__(self):
string = ""
for key,counts in self.model_dict.iteritems():
string += "{0}\tcount:{1}\n".format(key,counts["count"])
for token,count in counts["tokens_and_counts"].iteritems():
string += "\t{0}\tcount:{1}\n".format(token,count)
return string
def generateSample(self,max_tokens=100):
key = (None,)
tokens = []
for i in xrange(max_tokens):
sub_dict = self.model_dict[key]
if sub_dict is None:
return tokens #here we have reached a dead end
until = random.randint(0,sub_dict["count"])
for token,count in sub_dict["tokens_and_counts"].iteritems():
until -= count
if until <= 0:
if token is None:
return tokens #here we have reached the end of a phrase
tokens.append(token)
key = self._shift_key(key,token)
break
return tokens #here we have reached the max_tokens
def _shift_key(self,key,token):
if token is None:
key = (token,)
else:
key = list(key)
key.append(token)
if len(key)>self.n:
del(key[0])
key = tuple(key)
return key
@JnBrymn
Copy link
Author

JnBrymn commented Apr 26, 2014

If you want to make fun of people's tweets, here's a good way to do it!

import tweepy
import os
auth = tweepy.OAuthHandler(os.getenv("TWITTER_CONSUMER_KEY"),os.getenv("TWITTER_CONSUMER_SECRET"))
auth.set_access_token(os.getenv("TWITTER_BOT_TOKEN"), os.getenv("TWITTER_BOT_SECRET"))
t = tweepy.API(auth)

def make_fun_of(screen_name,n=1):
    ms=t.user_timeline( screen_name=screen_name,count=200)
    text = []
    for m in ms:
        text.extend(m.text.split(" "))
        text.append(None)
    mm = MarkovModel(text,n=n)
    return mm

use it this way

bbombgardener = make_fun_of("bbombgardener")
print " ".join(bbombgardener.generateSample())

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment