Skip to content

Instantly share code, notes, and snippets.

@rj00a
Last active May 29, 2019 06:01
Show Gist options
  • Save rj00a/025563886049f13f9d7f7ce782500e29 to your computer and use it in GitHub Desktop.
Save rj00a/025563886049f13f9d7f7ce782500e29 to your computer and use it in GitHub Desktop.
Simple Markov chain generator in Python
import random
# Generates markov chains using sequences of characters
class MarkovChar:
def __init__(self, seq_size):
# key type: chracter sequence
# value type: another map of character sequences to ints (occurance count)
self.prob_map = {}
self.seq_size = seq_size
# takes a string and adds it to the probability tree
def train(self, raw):
idx = 0
while True:
seq = raw[idx : idx + self.seq_size]
next_seq_end_idx = idx + self.seq_size * 2
# are we at the end of the string?
if next_seq_end_idx <= len(raw):
next_seq = raw[idx + self.seq_size : next_seq_end_idx]
if seq in self.prob_map:
if next_seq in self.prob_map[seq]:
self.prob_map[seq][next_seq] += 1
else:
self.prob_map[seq][next_seq] = 1
else:
self.prob_map[seq] = {next_seq : 1}
else:
if seq in self.prob_map:
if None in self.prob_map[seq]:
self.prob_map[seq][None] += 1
else:
self.prob_map[seq][None] = 1
else:
self.prob_map[seq] = {None : 1}
break
idx += self.seq_size
# Uses the probability tree to generate a sentence.
# max: the maximum number of characters generated before the function forcefully returns.
# if max is zero or less, no character limit is present.
def generate(self, char_limit = 0):
# start with a random initial char.
result = random.choice(list(self.prob_map.keys()))
result_pos = 0
while True:
inner_dict = self.prob_map[result[result_pos:result_pos + self.seq_size]]
inner_key_list = list(inner_dict.keys())
MAX = 0
for i in range(len(inner_key_list)):
MAX += inner_dict[inner_key_list[i]]
chosen_num = random.randint(0, MAX)
# use the chosen num to choose a key to append to result
def get_chosen_key():
count = 0
for i in range(len(inner_key_list)):
count += inner_dict[inner_key_list[i]]
if count >= chosen_num:
return inner_key_list[i]
chosen_key = get_chosen_key();
if type(chosen_key) == type(None) or (len(result) > char_limit and char_limit > 0):
return result
else:
result += chosen_key
result_pos += self.seq_size
try:
in_file = open('input.txt', 'r', encoding='utf8')
except FileNotFoundError:
print('input.txt does not exist.')
quit()
out_file = open('output.txt', 'w', encoding='utf8')
while True:
try:
seq_size = int(input('Enter a sequence size: '))
if seq_size < 1:
print("Sequence size must be greater than zero.")
continue
break;
except ValueError:
print("Invalid input.")
while True:
try:
char_limit = int(input('Character limit? (enter nothing to skip): '))
if char_limit < 0:
print('Character limit cannot be negative.')
continue
break
except ValueError:
char_limit = 0
break;
mc = MarkovChar(seq_size)
for line in in_file:
mc.train(line)
result = mc.generate(char_limit)
print()
print(result)
print()
out_file.write(result)
out_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment