-
-
Save fouric/8575e23a4509842baba707f79f68b936 to your computer and use it in GitHub Desktop.
A simple Markov chain to generate nicknames or fake English words.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import random | |
import re | |
import string | |
alphabet = string.ascii_lowercase | |
# Make a counter for each lowercase letter, and add a count to it | |
# whenever a letter follows it in /usr/share/dict/words | |
counters = {letter: collections.Counter() for letter in alphabet} | |
raw_words = [word for word in open('/usr/share/dict/words', 'r').read().splitlines()] | |
# Clean up the words by making them lowercase and removing any | |
# non-alphabetical characters | |
words = [re.sub(r'[^a-z]', r'', word.lower()) for word in raw_words] | |
# Get a count of following letters for each word and populate counters | |
for word in words: | |
for index in range(len(word) - 1): | |
counters[word[index]][word[index+1]] += 1 | |
# Make a list of strings starting with random letters | |
names = [ random.choice(alphabet) for _ in range(20) ] | |
# Iterate through each name and, using the predetermined probabilities | |
# of one letter following another, choose the next letters randomly | |
for name in names: | |
while len(name) < 6: | |
choices = counters[name[-1]].elements() | |
name += random.choice(list(choices)) | |
print name |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment