-
-
Save manuelbua/08636832d018161b143a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
A script for analyzing twitter stats on Ferguson | |
''' | |
import json | |
import re | |
import tweepy | |
def get_api(): | |
''' | |
Creates an instance of the tweepy OAuth class | |
''' | |
with open('config') as f: | |
api_key = f.readline().strip() | |
api_secret = f.readline().strip() | |
access_token = f.readline().strip() | |
access_token_secret = f.readline().strip() | |
auth = tweepy.OAuthHandler(api_key, api_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
return auth | |
class CustomStreamListener(tweepy.StreamListener): | |
''' | |
Sub class of StreamListener to handle searching | |
Ferguson tweets for various keywords | |
''' | |
def __init__(self, *args, **kwargs): | |
super(CustomStreamListener, self).__init__(*args, **kwargs) | |
self.count = 0 | |
with open('common') as f: | |
self.common = set(line.strip() for line in f) | |
self.all_words = {} | |
self.pattern = re.compile("[^\w'#]") | |
def on_status(self, status): | |
print 'Got a tweet' | |
self.count += 1 | |
tweet = status.text.lower() | |
tweet = self.pattern.sub(' ', tweet) | |
words = tweet.split() | |
for word in words: | |
if 'http' not in word and '@' not in word and \ | |
len(word) > 2 and word != '' and \ | |
not word.isspace() and not word.isdigit() and \ | |
word not in self.common: | |
if word not in self.all_words: | |
self.all_words[word] = 1 | |
else: | |
self.all_words[word] += 1 | |
if __name__ == '__main__': | |
l = CustomStreamListener() | |
try: | |
auth = get_api() | |
streaming_api = tweepy.Stream(auth, l) | |
streaming_api.filter(track=['Ferguson']) | |
except KeyboardInterrupt: | |
print '----TOTAL TWEETS----' | |
print l.count | |
print '--------------------' | |
json_data = json.dumps(l.all_words, indent=4) | |
with open('word_data.json', 'w') as f: | |
print >> f, json_data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
the | |
be | |
to | |
of | |
and | |
a | |
in | |
that | |
have | |
I | |
it | |
for | |
not | |
on | |
with | |
he | |
as | |
you | |
do | |
at | |
this | |
but | |
his | |
by | |
from | |
they | |
we | |
say | |
her | |
she | |
or | |
an | |
will | |
my | |
one | |
all | |
would | |
there | |
their | |
what | |
so | |
up | |
out | |
if | |
about | |
who | |
get | |
which | |
go | |
me | |
when | |
make | |
can | |
like | |
time | |
no | |
just | |
him | |
know | |
take | |
person | |
into | |
year | |
your | |
good | |
some | |
could | |
them | |
see | |
other | |
than | |
then | |
now | |
look | |
only | |
come | |
its | |
over | |
think | |
also | |
back | |
after | |
use | |
two | |
how | |
our | |
work | |
first | |
well | |
way | |
even | |
new | |
want | |
because | |
any | |
these | |
give | |
day | |
most | |
us | |
i'll | |
i'm | |
until | |
ha | |
haha | |
hahaha | |
hahahaha | |
hahahahaha | |
hi | |
rt | |
re | |
omg | |
omgg | |
omggg | |
omgggg | |
omggggg | |
oh | |
ohh | |
ohhh | |
was | |
wtf | |
said | |
done | |
else | |
else's | |
le | |
such | |
via | |
que | |
let | |
still | |
real |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Convert the JSON data into a | |
large block of text for parsing | |
''' | |
import json | |
f = open('word_data.json') | |
data = json.load(f) | |
f.close() | |
final_str = '' | |
for word in data: | |
count = data[word] | |
while count > 0: | |
final_str += word + ' ' | |
count -= 1 | |
with open('word_block.txt', 'w') as f: | |
f.write(final_str) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment