Skip to content

Instantly share code, notes, and snippets.

@manuelbua
Forked from howdydoody123/analyze.py
Created August 22, 2014 12:56
Show Gist options
  • Save manuelbua/08636832d018161b143a to your computer and use it in GitHub Desktop.
Save manuelbua/08636832d018161b143a to your computer and use it in GitHub Desktop.
'''
A script for analyzing twitter stats on Ferguson
'''
import json
import re
import tweepy
def get_api():
'''
Creates an instance of the tweepy OAuth class
'''
with open('config') as f:
api_key = f.readline().strip()
api_secret = f.readline().strip()
access_token = f.readline().strip()
access_token_secret = f.readline().strip()
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
return auth
class CustomStreamListener(tweepy.StreamListener):
'''
Sub class of StreamListener to handle searching
Ferguson tweets for various keywords
'''
def __init__(self, *args, **kwargs):
super(CustomStreamListener, self).__init__(*args, **kwargs)
self.count = 0
with open('common') as f:
self.common = set(line.strip() for line in f)
self.all_words = {}
self.pattern = re.compile("[^\w'#]")
def on_status(self, status):
print 'Got a tweet'
self.count += 1
tweet = status.text.lower()
tweet = self.pattern.sub(' ', tweet)
words = tweet.split()
for word in words:
if 'http' not in word and '@' not in word and \
len(word) > 2 and word != '' and \
not word.isspace() and not word.isdigit() and \
word not in self.common:
if word not in self.all_words:
self.all_words[word] = 1
else:
self.all_words[word] += 1
if __name__ == '__main__':
l = CustomStreamListener()
try:
auth = get_api()
streaming_api = tweepy.Stream(auth, l)
streaming_api.filter(track=['Ferguson'])
except KeyboardInterrupt:
print '----TOTAL TWEETS----'
print l.count
print '--------------------'
json_data = json.dumps(l.all_words, indent=4)
with open('word_data.json', 'w') as f:
print >> f, json_data
the
be
to
of
and
a
in
that
have
I
it
for
not
on
with
he
as
you
do
at
this
but
his
by
from
they
we
say
her
she
or
an
will
my
one
all
would
there
their
what
so
up
out
if
about
who
get
which
go
me
when
make
can
like
time
no
just
him
know
take
person
into
year
your
good
some
could
them
see
other
than
then
now
look
only
come
its
over
think
also
back
after
use
two
how
our
work
first
well
way
even
new
want
because
any
these
give
day
most
us
i'll
i'm
until
ha
haha
hahaha
hahahaha
hahahahaha
hi
rt
re
omg
omgg
omggg
omgggg
omggggg
oh
ohh
ohhh
was
wtf
said
done
else
else's
le
such
via
que
let
still
real
'''
Convert the JSON data into a
large block of text for parsing
'''
import json
f = open('word_data.json')
data = json.load(f)
f.close()
final_str = ''
for word in data:
count = data[word]
while count > 0:
final_str += word + ' '
count -= 1
with open('word_block.txt', 'w') as f:
f.write(final_str)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment