Skip to content

Instantly share code, notes, and snippets.

@AnthonyBriggs
Created June 7, 2018 05:23
Show Gist options
  • Save AnthonyBriggs/657e40de9c2d181362c1d2e43a193f6d to your computer and use it in GitHub Desktop.
Save AnthonyBriggs/657e40de9c2d181362c1d2e43a193f6d to your computer and use it in GitHub Desktop.
Write an Shakespeare!
"""Write an Shakespeare!"""
### What are these words call'd that stands hard by?
try:
words = open('shakespeare.txt').read().split()
print(len(words), "total words in shakespeare.txt")
except FileNotFoundError:
print("No, hath not? Rosalind lacks, then, the shakespeare.txt")
# The urllib of Python hath not made me smart
import urllib.request
thanks_gutenberg = urllib.request.urlopen(
'http://www.gutenberg.org/files/100/100-0.txt')
shakespeare = thanks_gutenberg.read().decode('utf-8').splitlines()
# Or, if thou wilt, cut out my gutenblurb
start = shakespeare.index("THE SONNETS")
finish = shakespeare.index(" FINIS")
shakespeare = shakespeare[start:finish+1]
open('shakespeare.txt', 'w').writelines([s+'\n' for s in shakespeare])
# So, now I have mine own again, be gone.
# That I may strive to kill it with a groan.
words = open('shakespeare.txt').read().split()
print(len(words), "total words in shakespeare.txt")
### If we commas have offended, string.replace(), and all is mended
def depoopify(word):
"""Remove punctuation (;:-,.?! etc) and convert to lower case."""
return ''.join(character for character in word.lower()
if character in 'abcdefghijklmnopqrstuvwxyz')
g_words = [depoopify(word) for word in words if word.startswith('g')]
#print(g_words)
unique = set # A set by any other name would be as unique.
unique_g_words = unique(g_words)
print()
print(len(unique_g_words),
'different words beginning with g in shakespeare.txt:')
print(sorted(unique_g_words))
### God join'd my heart and Romeo's, thou markov our hands;
markov = {}
this_word = None
for next_word in words:
next_word = depoopify(next_word)
if this_word is None:
# first word
this_word = next_word
continue
# build a list of all the words that come after 'this_word'
if this_word not in markov.keys():
markov[this_word] = []
markov[this_word].append(next_word)
this_word = next_word
# Each markov dictionary entry is the list of words that have followed
# that word, according to their probabilities
# (more common words will appear more often)
print()
print("Yon fairest...")
print(markov['fairest'])
print()
# will print:
#['creatures', 'wights', 'and', 'in', 'votary', 'lind', 'boughs',
# 'as', 'prisoner', 'lady', 'sister', 'that', 'lily', 'flowers',
# 'daughter', 'daughter', 'beauty', 'queen', 'hand', 'cordelia',
# 'of', 'shoot', 'shoot', 'goddess', 'dames', 'is', 'show', 'house',
# 'creature', 'that', 'dame', 'grant', 'cover', 'flowers', 'stars',
# 'chamber', 'creature', 'of', 'flowrs', 'youth', 'i']
### And write in thee the figures of their love
import random
def shakespeare_sentence():
first_word = depoopify(random.choice(words))
output = [first_word]
for i in range(random.randrange(5,15)):
next_words = markov[output[-1]]
output.append(random.choice(next_words))
output[0] = output[0].title()
return ' '.join(output) + '.'
for i in range(10):
print(shakespeare_sentence())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment