Last active
May 29, 2019 06:01
-
-
Save rj00a/025563886049f13f9d7f7ce782500e29 to your computer and use it in GitHub Desktop.
Simple Markov chain generator in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
# Generates markov chains using sequences of characters | |
class MarkovChar: | |
def __init__(self, seq_size): | |
# key type: chracter sequence | |
# value type: another map of character sequences to ints (occurance count) | |
self.prob_map = {} | |
self.seq_size = seq_size | |
# takes a string and adds it to the probability tree | |
def train(self, raw): | |
idx = 0 | |
while True: | |
seq = raw[idx : idx + self.seq_size] | |
next_seq_end_idx = idx + self.seq_size * 2 | |
# are we at the end of the string? | |
if next_seq_end_idx <= len(raw): | |
next_seq = raw[idx + self.seq_size : next_seq_end_idx] | |
if seq in self.prob_map: | |
if next_seq in self.prob_map[seq]: | |
self.prob_map[seq][next_seq] += 1 | |
else: | |
self.prob_map[seq][next_seq] = 1 | |
else: | |
self.prob_map[seq] = {next_seq : 1} | |
else: | |
if seq in self.prob_map: | |
if None in self.prob_map[seq]: | |
self.prob_map[seq][None] += 1 | |
else: | |
self.prob_map[seq][None] = 1 | |
else: | |
self.prob_map[seq] = {None : 1} | |
break | |
idx += self.seq_size | |
# Uses the probability tree to generate a sentence. | |
# max: the maximum number of characters generated before the function forcefully returns. | |
# if max is zero or less, no character limit is present. | |
def generate(self, char_limit = 0): | |
# start with a random initial char. | |
result = random.choice(list(self.prob_map.keys())) | |
result_pos = 0 | |
while True: | |
inner_dict = self.prob_map[result[result_pos:result_pos + self.seq_size]] | |
inner_key_list = list(inner_dict.keys()) | |
MAX = 0 | |
for i in range(len(inner_key_list)): | |
MAX += inner_dict[inner_key_list[i]] | |
chosen_num = random.randint(0, MAX) | |
# use the chosen num to choose a key to append to result | |
def get_chosen_key(): | |
count = 0 | |
for i in range(len(inner_key_list)): | |
count += inner_dict[inner_key_list[i]] | |
if count >= chosen_num: | |
return inner_key_list[i] | |
chosen_key = get_chosen_key(); | |
if type(chosen_key) == type(None) or (len(result) > char_limit and char_limit > 0): | |
return result | |
else: | |
result += chosen_key | |
result_pos += self.seq_size | |
try: | |
in_file = open('input.txt', 'r', encoding='utf8') | |
except FileNotFoundError: | |
print('input.txt does not exist.') | |
quit() | |
out_file = open('output.txt', 'w', encoding='utf8') | |
while True: | |
try: | |
seq_size = int(input('Enter a sequence size: ')) | |
if seq_size < 1: | |
print("Sequence size must be greater than zero.") | |
continue | |
break; | |
except ValueError: | |
print("Invalid input.") | |
while True: | |
try: | |
char_limit = int(input('Character limit? (enter nothing to skip): ')) | |
if char_limit < 0: | |
print('Character limit cannot be negative.') | |
continue | |
break | |
except ValueError: | |
char_limit = 0 | |
break; | |
mc = MarkovChar(seq_size) | |
for line in in_file: | |
mc.train(line) | |
result = mc.generate(char_limit) | |
print() | |
print(result) | |
print() | |
out_file.write(result) | |
out_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment