Skip to content

Instantly share code, notes, and snippets.

@barrucadu
Created November 22, 2013 02:29
Show Gist options
  • Save barrucadu/7593799 to your computer and use it in GitHub Desktop.
Save barrucadu/7593799 to your computer and use it in GitHub Desktop.
"""Markov Chain.
Pass input to stdin.
Usage:
markov <len> [-n <n>] [-p <p>] [-c] [-s <s>]
markov -h | --help
Options:
<len> The length (in tokens) of the output to generate
-n <n> The number of symbols to look at in generation [default: 1]
-p <p> The probability to pick a random token [default: 0.05]
-c Split input per character, rather than per word
-s <s> Random seed [default: system time]
-h --help Show this screen
"""
from docopt import docopt
import sys
from time import time
from itertools import islice
from markov import Markov
if __name__ == "__main__":
arguments = docopt(__doc__)
n = int(arguments["-n"])
p = float(arguments["-p"])
try:
s = int(arguments["-s"])
except:
s = int(time())
if p > 1 or p < 0:
print("p must be in the range 0 to 1 (inclusive)")
sys.exit(1)
if n < 0:
print("n must be greater than 0")
sys.exit(1)
training_data = sys.stdin.read()
if not arguments["-c"]:
training_data = training_data.split()
m = Markov(n=n, p=p, seed=s)
m.train(training_data)
print("Seed: ", s)
out = islice(m, int(arguments["<len>"]))
if arguments["-c"]:
out = "".join(out)
else:
out = " ".join(out)
print(out)
import random
class Markov:
def __init__(self, n, p, seed):
self.n = n
self.p = p
self.seed = seed
self.data = {}
def train(self, training_data):
prev = ()
for token in training_data:
for pprev in [prev[i:] for i in range(len(prev) + 1)]:
if not pprev in self.data:
self.data[pprev] = []
self.data[pprev].append(token)
prev += (token,)
if len(prev) > self.n:
prev = prev[1:]
def __iter__(self):
random.seed(self.seed)
self.prev = ()
return self
def __next__(self):
if self.prev == () or random.random() < self.p:
next = random.choice(self.data[()])
else:
try:
next = random.choice(self.data[self.prev])
except:
self.prev = ()
next = random.choice(self.data[self.prev])
self.prev += (next,)
if len(self.prev) > self.n:
self.prev = self.prev[1:]
return next
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment