Skip to content

Instantly share code, notes, and snippets.

@yssk22
Created August 15, 2010 13:18
Show Gist options
  • Save yssk22/525487 to your computer and use it in GitHub Desktop.
Save yssk22/525487 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
import sys
import json
import logging
import getopt
import couchdbkit
import json
import locale
import time
from tweepy import OAuthHandler, API, Cursor
from tweepy.error import TweepError
from urlparse import urlparse
from tweethandler import TweetHandler
LOG_NAME = 'tweetbackup'
LOG_LEVEL = logging.DEBUG
LOG_FORMAT = "[%(asctime)s] (%(levelname)s) - %(message)s *%(name)s*"
LOG_DATEFORMAT = "%H:%M"
PER_PAGE = 200
class Server(couchdbkit.Server):
def add_authorization(self, obj_auth):
# monkey patch for add_authorization
self.res.add_filter(obj_auth)
def db_from_url(url):
url = urlparse(url)
dbname = url.path[1:]
s = Server("%s://%s" % (url.scheme,url.netloc))
return s[dbname]
def load_rc():
homedir = os.environ['HOME']
account_rc_path = os.path.join(homedir, ".tweetbackuprc")
logger_rc_path = os.path.join(homedir, ".tweetloggerrc")
if not os.path.isfile(account_rc_path) or \
not os.path.isfile(logger_rc_path):
sys.stderr.write("rcfile does not exist!\n")
sys.exit(1)
account_config = json.loads(open(account_rc_path).read())
logger_config = json.loads(open(logger_rc_path).read())
return (account_config, logger_config)
def initLogger(logger_config, loglevel):
formatter = logging.Formatter(LOG_FORMAT, LOG_DATEFORMAT)
handler = TweetHandler(logger_config['oauth']['consumer_key'],
logger_config['oauth']['consumer_secret'],
logger_config['oauth']['access_key'],
logger_config['oauth']['access_secret'])
handler.setLevel(loglevel)
handler.setFormatter(formatter)
logger = logging.getLogger(LOG_NAME)
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)
return logger
def getClient(account_config):
auth = OAuthHandler(account_config['oauth']['consumer_key'],
account_config['oauth']['consumer_secret'])
auth.set_access_token(account_config['oauth']['access_key'],
account_config['oauth']['access_secret'])
return API(auth)
def get_since_id(db):
v = db.view("mytweets/by_tweet_id",
descending = True, limit = 1)
first = v.first()
if first:
return int(first["value"]["id"])
else:
return None
def to_date_string(t):
locale.setlocale(locale.LC_TIME, 'C')
s = t.strftime('%a %b %d %H:%M:%S +0000 %Y')
locale.setlocale(locale.LC_TIME, '')
return s
def to_dict(status):
dic = status.__getstate__()
dic['created_at'] = to_date_string(dic['created_at'])
dic['user'] = status.user.__getstate__()
dic['author'] = status.author.__getstate__()
dic['user']['created_at'] = to_date_string(dic['user']['created_at'])
dic['author']['created_at'] = to_date_string(dic['author']['created_at'])
dic['type'] = "tweet"
return dic
def collect_timeline(client, user, since_id):
tls = []
for page in Cursor(client.user_timeline,
id=user,
since_id=since_id,
count=PER_PAGE).pages():
logger.debug("process page %s" % len(page))
logger.debug("last id: %s" % page[-1].id)
for status in page:
tls.append(to_dict(status))
return tls
def main():
global logger
account_config, logger_config = load_rc()
try:
opts, args = getopt.getopt(sys.argv[1:], "d:u:")
except getopt.GetoptError, e:
sys.stderr.write("Invalid option\n")
sys.exit(2)
db = None
user = None
loglevel = logging.INFO
for o, a in opts:
if o == "-d":
db = a
if o == "-u":
user = a
if o == "-v":
loglevel = logging.DEBUG
if db is None:
sys.stderr.write("No database specified!\n")
sys.exit(2)
if user is None:
sys.stderr.write("No user specified!\n")
sys.exit(2)
logger = initLogger(logger_config, loglevel)
db = db_from_url(db)
client = getClient(account_config)
since_id = get_since_id(db)
tls = None
try:
tls = collect_timeline(client, user, since_id)
except TweepError, e:
logger.exception(e)
if not tls:
logger.info("No tweets found. (user=%s, since=%s)" %
(user, since_id))
sys.exit(0)
try:
db.bulk_save(tls)
logger.info("%s tweets backuped. (user=%s, since=%s)" %
(len(tls), user, since_id))
except Exception, e:
logger.exception(e)
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment