Created
June 7, 2018 05:23
-
-
Save AnthonyBriggs/657e40de9c2d181362c1d2e43a193f6d to your computer and use it in GitHub Desktop.
Write an Shakespeare!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Write an Shakespeare!""" | |
### What are these words call'd that stands hard by? | |
try: | |
words = open('shakespeare.txt').read().split() | |
print(len(words), "total words in shakespeare.txt") | |
except FileNotFoundError: | |
print("No, hath not? Rosalind lacks, then, the shakespeare.txt") | |
# The urllib of Python hath not made me smart | |
import urllib.request | |
thanks_gutenberg = urllib.request.urlopen( | |
'http://www.gutenberg.org/files/100/100-0.txt') | |
shakespeare = thanks_gutenberg.read().decode('utf-8').splitlines() | |
# Or, if thou wilt, cut out my gutenblurb | |
start = shakespeare.index("THE SONNETS") | |
finish = shakespeare.index(" FINIS") | |
shakespeare = shakespeare[start:finish+1] | |
open('shakespeare.txt', 'w').writelines([s+'\n' for s in shakespeare]) | |
# So, now I have mine own again, be gone. | |
# That I may strive to kill it with a groan. | |
words = open('shakespeare.txt').read().split() | |
print(len(words), "total words in shakespeare.txt") | |
### If we commas have offended, string.replace(), and all is mended | |
def depoopify(word): | |
"""Remove punctuation (;:-,.?! etc) and convert to lower case.""" | |
return ''.join(character for character in word.lower() | |
if character in 'abcdefghijklmnopqrstuvwxyz') | |
g_words = [depoopify(word) for word in words if word.startswith('g')] | |
#print(g_words) | |
unique = set # A set by any other name would be as unique. | |
unique_g_words = unique(g_words) | |
print() | |
print(len(unique_g_words), | |
'different words beginning with g in shakespeare.txt:') | |
print(sorted(unique_g_words)) | |
### God join'd my heart and Romeo's, thou markov our hands; | |
markov = {} | |
this_word = None | |
for next_word in words: | |
next_word = depoopify(next_word) | |
if this_word is None: | |
# first word | |
this_word = next_word | |
continue | |
# build a list of all the words that come after 'this_word' | |
if this_word not in markov.keys(): | |
markov[this_word] = [] | |
markov[this_word].append(next_word) | |
this_word = next_word | |
# Each markov dictionary entry is the list of words that have followed | |
# that word, according to their probabilities | |
# (more common words will appear more often) | |
print() | |
print("Yon fairest...") | |
print(markov['fairest']) | |
print() | |
# will print: | |
#['creatures', 'wights', 'and', 'in', 'votary', 'lind', 'boughs', | |
# 'as', 'prisoner', 'lady', 'sister', 'that', 'lily', 'flowers', | |
# 'daughter', 'daughter', 'beauty', 'queen', 'hand', 'cordelia', | |
# 'of', 'shoot', 'shoot', 'goddess', 'dames', 'is', 'show', 'house', | |
# 'creature', 'that', 'dame', 'grant', 'cover', 'flowers', 'stars', | |
# 'chamber', 'creature', 'of', 'flowrs', 'youth', 'i'] | |
### And write in thee the figures of their love | |
import random | |
def shakespeare_sentence(): | |
first_word = depoopify(random.choice(words)) | |
output = [first_word] | |
for i in range(random.randrange(5,15)): | |
next_words = markov[output[-1]] | |
output.append(random.choice(next_words)) | |
output[0] = output[0].title() | |
return ' '.join(output) + '.' | |
for i in range(10): | |
print(shakespeare_sentence()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment