Skip to content

Instantly share code, notes, and snippets.

@paralysedforce
Created July 9, 2015 18:18
Show Gist options
  • Select an option

  • Save paralysedforce/74804e86f10ca2453ba0 to your computer and use it in GitHub Desktop.

Select an option

Save paralysedforce/74804e86f10ca2453ba0 to your computer and use it in GitHub Desktop.
Markov Text Generator
#!/usr/bin/env python
"""
Implements a simple Markov text generator
"""
from __future__ import print_function
from collections import Counter
from string import ascii_uppercase as AU
from sys import argv
import random
import textwrap
#TODO Refactor this to make it more Pythonic.
def get_frequencies(n, reference):
"""
Returns a dict containing the number of n-grams found within the reference
"""
return Counter(reference[i:i+n] for i in range(len(reference)-n+1))
def weighted_pick(elements, frequencies):
"""
Picks an element from list elements where each index corresponds to a number
in list frequencies.
"""
numberline = [sum(frequencies[:i+1]) for i in range(len(frequencies))]
r = random.random() * numberline[-1]
for i, number in enumerate(numberline):
if i==0 and r<=number:
return elements[i]
if numberline[i-1]<=r<=number:
return elements[i]
def get_next_letter(n, string, frequencies):
"""
Gets the next letter of the string based on an n-gram using dict frequencies.
"""
elements = list(frequencies.keys())
last_letters = string[-n+1:]
new_elements = list(filter(lambda x:x[:n-1]==last_letters, elements))
new_freqs = list(map(lambda n:frequencies[n], new_elements))
return weighted_pick(new_elements, new_freqs)[-1]
def produce_sentence(n, reference):
"""
Produces a sentence using n-grams with given reference.
"""
frequencies = get_frequencies(n, reference)
def generate_seed():
new_elements = list(filter(lambda n:n[0] in AU, frequencies.keys()))
freqs = list(frequencies[e] for e in new_elements)
return weighted_pick(new_elements, freqs)
seed = generate_seed()
while "." not in seed:
seed+=get_next_letter(n, seed, frequencies)
return seed
def main():
path = argv[1]
try:
n = int(argv[2])
except IndexError:
n = 4
with open(path) as f:
text = f.read()
wrapper = textwrap.TextWrapper()
print(wrapper.fill((produce_sentence(n, text))))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment