Created
July 9, 2015 18:18
-
-
Save paralysedforce/74804e86f10ca2453ba0 to your computer and use it in GitHub Desktop.
Markov Text Generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """ | |
| Implements a simple Markov text generator | |
| """ | |
| from __future__ import print_function | |
| from collections import Counter | |
| from string import ascii_uppercase as AU | |
| from sys import argv | |
| import random | |
| import textwrap | |
| #TODO Refactor this to make it more Pythonic. | |
| def get_frequencies(n, reference): | |
| """ | |
| Returns a dict containing the number of n-grams found within the reference | |
| """ | |
| return Counter(reference[i:i+n] for i in range(len(reference)-n+1)) | |
| def weighted_pick(elements, frequencies): | |
| """ | |
| Picks an element from list elements where each index corresponds to a number | |
| in list frequencies. | |
| """ | |
| numberline = [sum(frequencies[:i+1]) for i in range(len(frequencies))] | |
| r = random.random() * numberline[-1] | |
| for i, number in enumerate(numberline): | |
| if i==0 and r<=number: | |
| return elements[i] | |
| if numberline[i-1]<=r<=number: | |
| return elements[i] | |
| def get_next_letter(n, string, frequencies): | |
| """ | |
| Gets the next letter of the string based on an n-gram using dict frequencies. | |
| """ | |
| elements = list(frequencies.keys()) | |
| last_letters = string[-n+1:] | |
| new_elements = list(filter(lambda x:x[:n-1]==last_letters, elements)) | |
| new_freqs = list(map(lambda n:frequencies[n], new_elements)) | |
| return weighted_pick(new_elements, new_freqs)[-1] | |
| def produce_sentence(n, reference): | |
| """ | |
| Produces a sentence using n-grams with given reference. | |
| """ | |
| frequencies = get_frequencies(n, reference) | |
| def generate_seed(): | |
| new_elements = list(filter(lambda n:n[0] in AU, frequencies.keys())) | |
| freqs = list(frequencies[e] for e in new_elements) | |
| return weighted_pick(new_elements, freqs) | |
| seed = generate_seed() | |
| while "." not in seed: | |
| seed+=get_next_letter(n, seed, frequencies) | |
| return seed | |
| def main(): | |
| path = argv[1] | |
| try: | |
| n = int(argv[2]) | |
| except IndexError: | |
| n = 4 | |
| with open(path) as f: | |
| text = f.read() | |
| wrapper = textwrap.TextWrapper() | |
| print(wrapper.fill((produce_sentence(n, text)))) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment