Last active
January 7, 2018 21:09
-
-
Save lynn/07799f035b7952e6a1e2f6a384f25011 to your computer and use it in GitHub Desktop.
Turn English text into nonsense that sounds like the input
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import fileinput | |
import random | |
import re | |
common = """the of and to a in for is on that by this with | |
i you it not or be are from at as your all have an was we | |
will can us i'm it you're i've my of""".split() | |
pronounce = {} | |
# Load pronunciations from the CMU pronunciation dictionary. | |
# Data: https://raw.githubusercontent.com/cmusphinx/cmudict/master/cmudict.dict | |
with open('cmudict.dict') as f: | |
for line in f: | |
word, *phonemes = line.strip().split(' ') | |
pronounce[word] = phonemes | |
def vowel_key(phonemes): | |
''' | |
Return the 'vowel key' for a list of phonemes. | |
Example: for both the pronunciations of 'shining' and 'typist' | |
this is ('AY1', 'IH0'), so these words are said to 'sound the same.' | |
''' | |
return tuple(x for x in phonemes if x[:1] in 'AEIOU') | |
# Create a literal rhyming dictionary! | |
# rhymes[('AY1', 'IH0')] contains 'shining', 'typist', 'whitish'... | |
rhymes = defaultdict(list) | |
for k, v in pronounce.items(): | |
rhymes[vowel_key(v)].append(k) | |
def find_rhyme(word): | |
'''Return a random word that sounds like the given word.''' | |
# Don't change really common/short words, or ones we don't know. | |
lword = word.lower() | |
if lword in common or len(lword) <= 3 or lword not in pronounce: | |
return word | |
# Pick a random rhyme: a word with the same vowel key as this one. | |
rhyme = random.choice(rhymes[vowel_key(pronounce[lword])]) | |
# Remove parenthesized numbers from the end of the rhyme. | |
# (This is just how cmudict lists alternate pronunciations.) | |
rhyme = re.sub(r'\(\d+\)$', '', rhyme) | |
# Restore capitalization from the original. | |
if word[:1].isupper(): | |
rhyme = rhyme[:1].upper() + rhyme[1:] | |
return rhyme | |
def rhyme_each_word(line): | |
return re.sub(r"[\w']+", lambda m: find_rhyme(m.group(0)), line.strip()) | |
if __name__ == '__main__': | |
for l in fileinput.input(): | |
print(rhyme_each_word(l)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment