Last active
September 25, 2016 20:25
-
-
Save nhatbui/d4993b209d0bd699aa84809928fb40bf to your computer and use it in GitHub Desktop.
Get all historical Tweets from user(s) starting from now.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import datetime | |
from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRequestError | |
twitter_handles = ['Twitter'] | |
# The seed_set is a queue containing API requests to be performed. | |
seed_set = [] | |
api_endpoint = 'statuses/user_timeline' | |
for handle in twitter_handles: | |
params = { | |
'screen_name': handle, | |
'count': 200 | |
} | |
seed_set.append((api_endpoint, params)) | |
# Initialize TwitterAPI with credentials | |
oauth = TwitterOAuth.read_file(args.key_file) | |
api = TwitterAPI( | |
oauth.consumer_key, | |
oauth.consumer_secret, | |
oauth.access_token_key, | |
oauth.access_token_secret | |
) | |
# Send of these requests! | |
for endpoint, params in seed_set: | |
try: | |
response = api.request(endpoint, params) | |
ret_count = 0 | |
min_id = None | |
for tweet in response: | |
# ignore retweets | |
if 'retweeted_status' not in tweet: | |
if not min_id: | |
min_id = tweet['id'] | |
elif tweet['id'] < min_id: | |
min_id = tweet['id'] | |
# DO STUFF HERE | |
print(tweet) | |
ret_count += 1 | |
if min_id and ret_count == 200: | |
new_params = copy.deepcopy(params) | |
new_params['max_id'] = min_id | |
seed_set.append((endpoint, new_params)) | |
# get_rest_quota may return values of None for each key | |
# if value is None, we are optimistic and go on without checking the quota. | |
quota = response.get_rest_quota() | |
if quota['remaining'] == 0: | |
print('Waiting for new interval. Resume at {}'.format(quota['reset'])) | |
while datetime.datetime.now() < quota['reset']: | |
continue | |
print('Resuming') | |
except TwitterRequestError: | |
# The request has errors. Put back on queue and try again later. | |
seed_set.append((endpoint, params)) | |
pass | |
except TwitterConnectionError: | |
# The request has errors. Put back on queue and try again later. | |
seed_set.append((endpoint, params)) | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment