Created
July 26, 2021 00:24
-
-
Save perryBunn/794d25ad8467a120179d6ebc786cfbfc to your computer and use it in GitHub Desktop.
This is realated to a discussion that me and my girlfriend were having about the Infinite monkey theorem. Needless to say it devolved into us wondering what the average word length of all of shakespeare is. Text file is from Project Gutenberg and can be found on their website.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def main(): | |
words = { | |
# word: [ | |
# length, | |
# count | |
# ] | |
} | |
with open('t8.shakespeare.txt', 'r', encoding='utf-8') as file: | |
comment = False | |
for line in file: | |
if '>>' in line: | |
comment = False | |
continue | |
if '<<' in line or comment is True: | |
comment = True | |
continue | |
for word in line.split(): | |
# add word to dictionary | |
if word in words.keys(): | |
words[word][1] = words[word][1] + 1 | |
else: | |
words[word] = [len(word), 1] | |
printStats(words) | |
def printStats(dict): | |
print("Stats:") | |
totalChars = 0 | |
for word in dict: | |
totalChars += dict[word][0]*dict[word][1] | |
totalWords = 0 | |
for word in dict: | |
totalWords += dict[word][1] | |
avgWordLen = totalChars / totalWords | |
print("Avg word length:", round(avgWordLen, 4)) | |
sort = sorted(dict, key=lambda w: dict[w][1], reverse=True) | |
iteration = 10 | |
i = 0 | |
print("Top", iteration, "occured words in Shakespeare's writing...") | |
for word in sort: | |
if i > iteration: | |
break | |
print("|_", word) | |
print("| |_ Length:", dict[word][0]) | |
print("| |_ Occurence:", dict[word][1]) | |
i += 1 | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment