Created
May 2, 2014 10:23
-
-
Save thinrhino/10334d84b0c8e0c46c1e to your computer and use it in GitHub Desktop.
Read twitter sample stream, https://dev.twitter.com/docs/api/1.1/get/statuses/sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to populate data into MongoDB | |
import twitter | |
import time | |
import logging | |
from pymongo import MongoClient | |
CONSUMER_KEY = '<twitter_consumer_key>' | |
CONSUMER_SECRET = '<twitter_secret_key>' | |
OAUTH_TOKEN = '<twitter_oauth_token>' | |
OAUTH_TOKEN_SECRET = '<twitter_oauth_token_secret>' | |
client = MongoClient('<mongodb_ip_address>', 50000) # MongoDB access ip & port | |
db = client['twitter'] | |
auth = twitter.OAuth( | |
OAUTH_TOKEN, OAUTH_TOKEN_SECRET, | |
CONSUMER_KEY, CONSUMER_SECRET | |
) | |
logging.basicConfig( | |
filename='push_data.log', | |
level=logging.DEBUG, | |
filemode='w' | |
) | |
console = logging.StreamHandler() | |
console.setLevel(logging.DEBUG) | |
logging.getLogger('').addHandler(console) | |
try: | |
twitter_stream = twitter.TwitterStream(auth=auth) | |
iterator = twitter_stream.statuses.sample() | |
start_time = time.time() | |
data_count = 0 | |
for tweet in iterator: | |
if 'delete' in tweet.keys(): | |
continue | |
if not 'lang' in tweet.keys(): | |
continue | |
db.twitter_data.insert(tweet) | |
data_count += 1 | |
if data_count == 10000: | |
time_taken = time.time() - start_time | |
total_data_count = db['twitter_data'].find().count() | |
logging.info('%s,%s\n' % (total_data_count, time_taken)) | |
start_time = time.time() | |
data_count = 0 | |
except Exception, e: | |
logging.critical('Error : %s' % e) | |
raise e |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment