Created
December 17, 2010 05:00
-
-
Save jehiah/744508 to your computer and use it in GitHub Desktop.
python script to archive your tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
twitter_archiver.py written by Jehiah Czebotar 2010 <[email protected]> http://jehiah.cz/ | |
this uses the great 'python twitter tools' library by Mike Verdone | |
http://mike.verdone.ca/twitter/ | |
usage: | |
$ pip install twitter | |
$ python twitter_archiver.py | |
$ view tweets.json | |
""" | |
import sys | |
import os.path | |
import simplejson as json | |
import time | |
import logging | |
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, | |
format='%(asctime)s %(process)d %(filename)s %(lineno)d %(levelname)s #| %(message)s', | |
datefmt='%H:%M:%S') | |
from twitter.api import Twitter, TwitterHTTPError | |
from twitter.oauth import OAuth, read_token_file | |
from twitter.oauth_dance import oauth_dance | |
from twitter.cmdline import CONSUMER_KEY, CONSUMER_SECRET | |
OPTIONS = { | |
'oauth_filename': os.environ.get('HOME', '') + os.sep + '.twitter_oauth', | |
'secure': True, | |
} | |
def save_user_timeline(twitter, outputfile): | |
# docs are at http://dev.twitter.com/doc/get/statuses/user_timeline | |
max_id = None | |
data = [] | |
while True: | |
try: | |
kwargs = {} | |
if max_id: | |
kwargs['max_id'] = max_id | |
temp_data = twitter.statuses.user_timeline(count=200, trim_user='t', include_rts='t', include_entities='t', **kwargs) | |
if not temp_data: | |
# yay we reached the end | |
break | |
max_id = min([x.get('id') for x in temp_data]) # bound to the min of this range | |
print temp_data | |
data += temp_data | |
except TwitterHTTPError, e: | |
logging.exception('twitter error') | |
d = (e.e.code, e.uri, e.format, e.uriparts, e.e.fp.read()) | |
print d | |
f = open('error.log', 'w') | |
f.write(json.dumps(d)) | |
f.close() | |
break | |
except: | |
logging.exception('failed') | |
break | |
time.sleep(15) | |
f = open(outputfile, 'w') | |
f.write(json.dumps(data)) | |
f.close() | |
print 'saved %d records' % len(data) | |
def main(args=sys.argv[1:]): | |
oauth_filename = os.path.expanduser(OPTIONS['oauth_filename']) | |
if ('authorize' in args | |
or not os.path.exists(oauth_filename)): | |
oauth_dance( | |
"the Command-Line Tool", CONSUMER_KEY, CONSUMER_SECRET, | |
OPTIONS['oauth_filename']) | |
oauth_token, oauth_token_secret = read_token_file(oauth_filename) | |
twitter = Twitter( | |
auth=OAuth( | |
oauth_token, oauth_token_secret, CONSUMER_KEY, CONSUMER_SECRET), | |
secure=OPTIONS['secure'], | |
api_version='1', | |
domain='api.twitter.com') | |
save_user_timeline(twitter, outputfile='tweets.json') | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this, it still works with just a minor change to
api_version='1.1'
:https://gist.github.com/hugovk/d5d57086ed8685345f54a9cc5fcf95b9/revisions