AnthonyBriggs · June 7, 2018 05:23
diff --git a/shakespeare.py b/shakespeare.py

 """Write an Shakespeare!"""

 ### What are these words call'd that stands hard by? 
 try:
    words = open('shakespeare.txt').read().split()
    print(len(words), "total words in shakespeare.txt")

 except FileNotFoundError:
    print("No, hath not? Rosalind lacks, then, the shakespeare.txt")
    
    # The urllib of Python hath not made me smart
    import urllib.request
    thanks_gutenberg = urllib.request.urlopen(
        'http://www.gutenberg.org/files/100/100-0.txt')
    shakespeare = thanks_gutenberg.read().decode('utf-8').splitlines()
            
    # Or, if thou wilt, cut out my gutenblurb
    start = shakespeare.index("THE SONNETS")
    finish = shakespeare.index("  FINIS")
    shakespeare = shakespeare[start:finish+1]
    open('shakespeare.txt', 'w').writelines([s+'\n' for s in shakespeare])
    
    # So, now I have mine own again, be gone.
    # That I may strive to kill it with a groan.
    words = open('shakespeare.txt').read().split()
    print(len(words), "total words in shakespeare.txt")


 ### If we commas have offended, string.replace(), and all is mended
 def depoopify(word):
    """Remove punctuation (;:-,.?! etc) and convert to lower case."""
    return ''.join(character for character in word.lower() 
        if character in 'abcdefghijklmnopqrstuvwxyz')
        
 g_words = [depoopify(word) for word in words if word.startswith('g')]
 #print(g_words)
 unique = set # A set by any other name would be as unique.
 unique_g_words = unique(g_words)

 print()
 print(len(unique_g_words), 
       'different words beginning with g in shakespeare.txt:')
 print(sorted(unique_g_words))


 ### God join'd my heart and Romeo's, thou markov our hands;
 markov = {}
 this_word = None
 for next_word in words:
    next_word = depoopify(next_word)
    
    if this_word is None:
        # first word
        this_word = next_word
        continue
    
    # build a list of all the words that come after 'this_word'
    if this_word not in markov.keys():
        markov[this_word] = []
    markov[this_word].append(next_word)
    this_word = next_word

 # Each markov dictionary entry is the list of words that have followed
 # that word, according to their probabilities 
 # (more common words will appear more often)
 print()
 print("Yon fairest...")
 print(markov['fairest'])
 print()
 # will print:
 #['creatures', 'wights', 'and', 'in', 'votary', 'lind', 'boughs', 
 # 'as', 'prisoner', 'lady', 'sister', 'that', 'lily', 'flowers', 
 # 'daughter', 'daughter', 'beauty', 'queen', 'hand', 'cordelia', 
 # 'of', 'shoot', 'shoot', 'goddess', 'dames', 'is', 'show', 'house', 
 # 'creature', 'that', 'dame', 'grant', 'cover', 'flowers', 'stars', 
 # 'chamber', 'creature', 'of', 'flowrs', 'youth', 'i']


 ### And write in thee the figures of their love
 import random

 def shakespeare_sentence():
    first_word = depoopify(random.choice(words))
    output = [first_word]
    for i in range(random.randrange(5,15)):
        next_words = markov[output[-1]]
        output.append(random.choice(next_words))
    output[0] = output[0].title()
    return ' '.join(output) + '.'

 for i in range(10):
    print(shakespeare_sentence())

	"""Write an Shakespeare!"""

	### What are these words call'd that stands hard by?
	try:
	words = open('shakespeare.txt').read().split()
	print(len(words), "total words in shakespeare.txt")

	except FileNotFoundError:
	print("No, hath not? Rosalind lacks, then, the shakespeare.txt")

	# The urllib of Python hath not made me smart
	import urllib.request
	thanks_gutenberg = urllib.request.urlopen(
	'http://www.gutenberg.org/files/100/100-0.txt')
	shakespeare = thanks_gutenberg.read().decode('utf-8').splitlines()

	# Or, if thou wilt, cut out my gutenblurb
	start = shakespeare.index("THE SONNETS")
	finish = shakespeare.index(" FINIS")
	shakespeare = shakespeare[start:finish+1]
	open('shakespeare.txt', 'w').writelines([s+'\n' for s in shakespeare])

	# So, now I have mine own again, be gone.
	# That I may strive to kill it with a groan.
	words = open('shakespeare.txt').read().split()
	print(len(words), "total words in shakespeare.txt")


	### If we commas have offended, string.replace(), and all is mended
	def depoopify(word):
	"""Remove punctuation (;:-,.?! etc) and convert to lower case."""
	return ''.join(character for character in word.lower()
	if character in 'abcdefghijklmnopqrstuvwxyz')

	g_words = [depoopify(word) for word in words if word.startswith('g')]
	#print(g_words)
	unique = set # A set by any other name would be as unique.
	unique_g_words = unique(g_words)

	print()
	print(len(unique_g_words),
	'different words beginning with g in shakespeare.txt:')
	print(sorted(unique_g_words))


	### God join'd my heart and Romeo's, thou markov our hands;
	markov = {}
	this_word = None
	for next_word in words:
	next_word = depoopify(next_word)

	if this_word is None:
	# first word
	this_word = next_word
	continue

	# build a list of all the words that come after 'this_word'
	if this_word not in markov.keys():
	markov[this_word] = []
	markov[this_word].append(next_word)
	this_word = next_word

	# Each markov dictionary entry is the list of words that have followed
	# that word, according to their probabilities
	# (more common words will appear more often)
	print()
	print("Yon fairest...")
	print(markov['fairest'])
	print()
	# will print:
	#['creatures', 'wights', 'and', 'in', 'votary', 'lind', 'boughs',
	# 'as', 'prisoner', 'lady', 'sister', 'that', 'lily', 'flowers',
	# 'daughter', 'daughter', 'beauty', 'queen', 'hand', 'cordelia',
	# 'of', 'shoot', 'shoot', 'goddess', 'dames', 'is', 'show', 'house',
	# 'creature', 'that', 'dame', 'grant', 'cover', 'flowers', 'stars',
	# 'chamber', 'creature', 'of', 'flowrs', 'youth', 'i']


	### And write in thee the figures of their love
	import random

	def shakespeare_sentence():
	first_word = depoopify(random.choice(words))
	output = [first_word]
	for i in range(random.randrange(5,15)):
	next_words = markov[output[-1]]
	output.append(random.choice(next_words))
	output[0] = output[0].title()
	return ' '.join(output) + '.'

	for i in range(10):
	print(shakespeare_sentence())