Created
May 22, 2016 11:03
-
-
Save tonybaloney/750de4b21b8630e8f16a43edde8cf1d5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import listdir | |
from os.path import isfile, join | |
import re | |
import matplotlib.pyplot as plt | |
import numpy as np | |
path = 'transcripts' | |
transcripts = [f for f in listdir(path) if isfile(join(path, f))] | |
episodes = [] | |
for transcript in transcripts: | |
with open(join(path, transcript), encoding='utf8') as t: | |
episodes.append({ | |
'number': transcript.replace('.txt', ''), | |
'contents': t.read() | |
}) | |
for episode in episodes: | |
awesomes = re.findall(r'that\'s (\w+)?( )?awesome', episode['contents']) | |
rank = len(awesomes) # how many awesomes | |
# Sometimes Michael gets really excited.. | |
for awesome, _ in awesomes: | |
if awesome == 'pretty': # meh | |
rank = rank + 1 | |
if awesome == 'very': # nice | |
rank = rank + 2 | |
if awesome == 'really': # woah | |
rank = rank + 5 | |
episode['awesome'] = rank | |
ind = np.arange(53) | |
p1 = plt.bar(ind, [ep['awesome'] for ep in episodes], | |
color='g') | |
plt.ylabel('Awesomeness') | |
plt.title('Episodes by awesomeness') | |
plt.xticks(ind + 2., list([ep['number'] for ep in episodes])) | |
plt.yticks([ep['awesome'] for ep in episodes]) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment