Skip to content

Instantly share code, notes, and snippets.

@om-henners
Last active August 15, 2016 22:03
Show Gist options
  • Save om-henners/442da9129e8cab6b583a22827e60ae2c to your computer and use it in GitHub Desktop.
Save om-henners/442da9129e8cab6b583a22827e60ae2c to your computer and use it in GitHub Desktop.
Download pyconau tweets with Twython. Because of twitter's rate limiting this will take a while to execute. Also, the files wind up pretty large (~300mb per rate limited block)
# coding: utf-8
from __future__ import print_function
from datetime import date, timedelta, datetime
import json
from time import sleep
from twython import Twython, TwythonRateLimitError
APP_KEY = 'YOUR_APP_KEY'
APP_SECRET = 'YOUR_APP_SECRET'
twitter = Twython(APP_KEY, APP_SECRET, oauth_version=2)
ACCESS_TOKEN = twitter.obtain_access_token()
twitter = Twython(APP_KEY, access_token=ACCESS_TOKEN)
querystring = '#pyconau OR @pyconau since:{:%Y-%m-%d}'.format(date.today() - timedelta(days=8))
# technically, the max is 7 days, but 8 just to quickly get around timezone shenannigans
querystring_user = 'from:pyconau since:{:%Y-%m-%d}'.format(date.today() - timedelta(days=8))
with open('pyconau_2016_tweets.json', 'wb') as f:
tweet_ids = set() # avoid adding duplicate tweets if possible
for qs in [querystring, querystring_user]:
max_id = None
while True:
some_results = False
try:
cursor = twitter.cursor(twitter.search, q=qs, count=100, max_id=max_id)
for result in cursor:
if result['id'] in tweet_ids:
continue
f.write(json.dumps(result))
f.write('\n')
max_id = result['id']
some_results = True
tweet_ids.add(result['id'])
except TwythonRateLimitError as e:
pass
print(max_id)
if not some_results:
# Assume if there are no results, we've finally run out of tweets to be returned by the search query
break
# rate limit is 180 requests every 15 minutes. So sleep in between timeouts...
sleep(60*16) # 16 minutes to be on the safe side
print("Done", qs, datetime.now().isoformat())
print("Done @pyconau tweets", datetime.now().isoformat())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment