Skip to content

Instantly share code, notes, and snippets.

@lynn
Last active January 7, 2018 21:09
Show Gist options
  • Save lynn/07799f035b7952e6a1e2f6a384f25011 to your computer and use it in GitHub Desktop.
Save lynn/07799f035b7952e6a1e2f6a384f25011 to your computer and use it in GitHub Desktop.
Turn English text into nonsense that sounds like the input
from collections import defaultdict
import fileinput
import random
import re
common = """the of and to a in for is on that by this with
i you it not or be are from at as your all have an was we
will can us i'm it you're i've my of""".split()
pronounce = {}
# Load pronunciations from the CMU pronunciation dictionary.
# Data: https://raw.githubusercontent.com/cmusphinx/cmudict/master/cmudict.dict
with open('cmudict.dict') as f:
for line in f:
word, *phonemes = line.strip().split(' ')
pronounce[word] = phonemes
def vowel_key(phonemes):
'''
Return the 'vowel key' for a list of phonemes.
Example: for both the pronunciations of 'shining' and 'typist'
this is ('AY1', 'IH0'), so these words are said to 'sound the same.'
'''
return tuple(x for x in phonemes if x[:1] in 'AEIOU')
# Create a literal rhyming dictionary!
# rhymes[('AY1', 'IH0')] contains 'shining', 'typist', 'whitish'...
rhymes = defaultdict(list)
for k, v in pronounce.items():
rhymes[vowel_key(v)].append(k)
def find_rhyme(word):
'''Return a random word that sounds like the given word.'''
# Don't change really common/short words, or ones we don't know.
lword = word.lower()
if lword in common or len(lword) <= 3 or lword not in pronounce:
return word
# Pick a random rhyme: a word with the same vowel key as this one.
rhyme = random.choice(rhymes[vowel_key(pronounce[lword])])
# Remove parenthesized numbers from the end of the rhyme.
# (This is just how cmudict lists alternate pronunciations.)
rhyme = re.sub(r'\(\d+\)$', '', rhyme)
# Restore capitalization from the original.
if word[:1].isupper():
rhyme = rhyme[:1].upper() + rhyme[1:]
return rhyme
def rhyme_each_word(line):
return re.sub(r"[\w']+", lambda m: find_rhyme(m.group(0)), line.strip())
if __name__ == '__main__':
for l in fileinput.input():
print(rhyme_each_word(l))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment