Created
November 11, 2013 05:14
-
-
Save leonardreidy/7408252 to your computer and use it in GitHub Desktop.
A simple script to stream data from Twitter. Twizzer streams data with a given set of filters, then strips out the text fields and appends a datetime to them which it streams to stdout and to a file simultaneously. (Script is based on the work of Youtube user Sentdex, with modifications suggested by Youtube user Satish Chandra and some of my own…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# Twizzer-0-0-1.py: Simple script for pulling streaming data from Twitter using | |
# the credentials of a given user. You will need a developer account for this | |
# to work, because of the way Twitter API 1.1 handles authentication etc. This script is | |
# based on a very similar script, by YouTube user SentDex, but with some modifcations | |
# suggested by YouTube user Satish Chandra, and some of my own to resolve stdout encoding | |
# issues, and user interaction. | |
# | |
# ROADMAP | |
# The next version, Twizzer-0-0-1.py will include more user interaction code, including | |
# some basic GUI elements, and the user will be able to choose the filters used by the | |
# program. | |
# Imports/general tools | |
import time | |
# Imports/general tools/encoding stdout | |
import sys | |
import codecs | |
# Import JSON parsing tools | |
import json | |
# Imports/tweepy specific | |
from tweepy import Stream | |
from tweepy import OAuthHandler | |
from tweepy.streaming import StreamListener | |
# Credentials | |
consumer_key = 'place-consumer-key-here' | |
consumer_secret = 'place-consumer-secret-here' | |
access_token = 'place-access-token-here' | |
access_secret = 'place-access-secret-here' | |
# To prevent encoding errors (charmap codec errors, etc) in the stdout stream | |
sys.stdout = codecs.getwriter('utf8')(sys.stdout) | |
# Prompt for user input | |
filename = raw_input("\nEnter a name for the output file: ") | |
# Information for user | |
print "Please wait. Connecting and authenticating now...\n" | |
print "#########################################################################" | |
print "Please note that search keywords are hard-wired until further" | |
print "notice. If you want to change them, you will have to open the program" | |
print "file and change them there!" | |
print "#########################################################################\n" | |
# Twizzer stream class | |
class listener(StreamListener): | |
def on_data(self, data): | |
try: | |
jsonData = json.loads(data) | |
# alternative approach using json library (suggested by Satish Chandra) | |
createdAt = jsonData['created_at'] | |
text = jsonData['text'] | |
# concatenate the timestamp, an arbitrary separator and the text of the tweet | |
saveThis = createdAt+'=>'+text | |
# print to stdout | |
print saveThis | |
# open file for writing, in append mode so that updates don't erase previous work | |
saveFile = open(filename+'.csv', 'a') | |
# set file encoding to utf-8 and write to file | |
saveFile.write(saveThis.encode('utf-8')) | |
saveFile.write('\n') | |
saveFile.close() | |
return True | |
except BaseException, e: | |
print 'failed ondata, ', str(e) | |
time.sleep(5) | |
def on_error(self, status): | |
print status | |
# The meat of the script, authentication first, then streaming | |
auth = OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_secret) | |
twizzer = Stream(auth, listener()) | |
# Set the search terms here! As far as I | |
# can tell, adding multiple terms inside: | |
# track=['term1', 'term2', 'term_n'] | |
# returns the results of a Boolean OR | |
twizzer.filter(track=['car']) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment