JnBrymn · August 29, 2015 13:57 · JnBrymn · Mar 31, 2014 · JnBrymn · Apr 26, 2014
diff --git a/MarkovModel.py b/MarkovModel.py
 from collections import defaultdict
 import random
        
 class MarkovModel(object):
    """
    Takes iterator of tokens and makes a markov model of the tokens. n is the "order" of the model
    None is a special token that serves as a sort of delimiter of phrases.
    """
    @classmethod
    def _tokenizer(cls,text,token_delim):
        for phrase in text.split("\n"):
            for token in phrase.split(token_delim):
                yield token
            yield None
    
    @classmethod
    def fromText(cls,text,token_delim=".",n=1):
        return MarkovModel(MarkovModel._tokenizer(text,token_delim),n)
    
    def __init__(self,token_iterator,n=1):
        self.n = n
        self.model_dict = defaultdict(lambda: {"count":0,"tokens_and_counts":defaultdict(int)})
        key = (None,) #this is a tuple
        for token in token_iterator:
            sub_dict = self.model_dict[key]
            sub_dict["count"] += 1
            sub_dict["tokens_and_counts"][token] += 1
            key = self._shift_key(key,token)
        self.model_dict.default_factory = lambda:None #make it so that you can't add anything new
                
    def __repr__(self):
        string = ""
        for key,counts in self.model_dict.iteritems():
            string += "{0}\tcount:{1}\n".format(key,counts["count"])
            for token,count in counts["tokens_and_counts"].iteritems():
                string += "\t{0}\tcount:{1}\n".format(token,count)
        return string
    
    def generateSample(self,max_tokens=100):
        key = (None,)
        tokens = []
        for i in xrange(max_tokens):
            sub_dict = self.model_dict[key] 
            if sub_dict is None:
                return tokens #here we have reached a dead end
            until = random.randint(0,sub_dict["count"])
            for token,count in sub_dict["tokens_and_counts"].iteritems():
                until -= count
                if until <= 0:
                    if token is None:
                        return tokens #here we have reached the end of a phrase
                    tokens.append(token)
                    key = self._shift_key(key,token)
                    break
        return tokens #here we have reached the max_tokens
                
    def _shift_key(self,key,token):
        if token is None:
            key = (token,)
        else:
            key = list(key)
            key.append(token)
            if len(key)>self.n:
                del(key[0])
            key = tuple(key)
        return key
	from collections import defaultdict
	import random

	class MarkovModel(object):
	"""
	Takes iterator of tokens and makes a markov model of the tokens. n is the "order" of the model
	None is a special token that serves as a sort of delimiter of phrases.
	"""
	@classmethod
	def _tokenizer(cls,text,token_delim):
	for phrase in text.split("\n"):
	for token in phrase.split(token_delim):
	yield token
	yield None

	@classmethod
	def fromText(cls,text,token_delim=".",n=1):
	return MarkovModel(MarkovModel._tokenizer(text,token_delim),n)

	def __init__(self,token_iterator,n=1):
	self.n = n
	self.model_dict = defaultdict(lambda: {"count":0,"tokens_and_counts":defaultdict(int)})
	key = (None,) #this is a tuple
	for token in token_iterator:
	sub_dict = self.model_dict[key]
	sub_dict["count"] += 1
	sub_dict["tokens_and_counts"][token] += 1
	key = self._shift_key(key,token)
	self.model_dict.default_factory = lambda:None #make it so that you can't add anything new

	def __repr__(self):
	string = ""
	for key,counts in self.model_dict.iteritems():
	string += "{0}\tcount:{1}\n".format(key,counts["count"])
	for token,count in counts["tokens_and_counts"].iteritems():
	string += "\t{0}\tcount:{1}\n".format(token,count)
	return string

	def generateSample(self,max_tokens=100):
	key = (None,)
	tokens = []
	for i in xrange(max_tokens):
	sub_dict = self.model_dict[key]
	if sub_dict is None:
	return tokens #here we have reached a dead end
	until = random.randint(0,sub_dict["count"])
	for token,count in sub_dict["tokens_and_counts"].iteritems():
	until -= count
	if until <= 0:
	if token is None:
	return tokens #here we have reached the end of a phrase
	tokens.append(token)
	key = self._shift_key(key,token)
	break
	return tokens #here we have reached the max_tokens

	def _shift_key(self,key,token):
	if token is None:
	key = (token,)
	else:
	key = list(key)
	key.append(token)
	if len(key)>self.n:
	del(key[0])
	key = tuple(key)
	return key