Skip to content

Instantly share code, notes, and snippets.

@paulsmith
Created August 11, 2010 04:39
Show Gist options
  • Save paulsmith/518497 to your computer and use it in GitHub Desktop.
Save paulsmith/518497 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import urllib, os, time, glob, urlparse, errno, sys, getpass, re
from xml.dom.minidom import parseString as parse
API_PATH = '/api/read'
MAX_NUM_PER_REQUEST = 50
class BadLogin(Exception): pass
def make_api_url(tumblr):
return 'http://' + tumblr + '.tumblr.com' + API_PATH
def read_from_api(tumblr, email, password, start=0, num=0):
params = {
'email': email,
'password': password,
'start': start,
'num': num
}
url = make_api_url(tumblr) + '?' + urllib.urlencode(params)
f = urllib.urlopen(url)
try:
rsp = f.read()
if rsp == 'Invalid credentials.':
raise BadLogin()
return rsp
finally:
f.close()
def get_total_num_posts(tumblr, email, password):
dom = parse(read_from_api(tumblr, email, password))
return int(dom.getElementsByTagName('posts')[0].getAttribute('total'))
def mkdir_p(path):
try:
os.makedirs(path)
except OSError, e:
if e.errno != errno.EEXIST:
raise
def backup(tumblr, email, password, output_dir='.', show_progress=True):
if not output_dir.startswith('.'):
mkdir_p(output_dir)
i = 0
n = MAX_NUM_PER_REQUEST
total = get_total_num_posts(tumblr, email, password)
while i < total:
f = open(os.path.join(output_dir, '%s_tumblr_%s-%s.xml' % (tumblr, i, i+n-1)), 'w')
try:
f.write(read_from_api(tumblr, email, password, i, n))
finally:
f.close()
if show_progress:
sys.stderr.write('.')
sys.stderr.flush()
i += n
time.sleep(2)
if total > 0:
print >> sys.stderr
return total
def usage():
print >> sys.stderr, 'Usage: %s http://<you>.tumblr.com [output_dir]' % sys.argv[0]
sys.exit(1)
if __name__ == '__main__':
if len(sys.argv[1:]) < 1:
usage()
m = re.match('http://([\w-]+)\.tumblr\.com', sys.argv[1])
if not m:
usage()
tumblr = m.group(1)
if len(sys.argv[1:]) == 2:
output_dir = sys.argv[2]
else:
output_dir = '.'
email, password = None, None
while not email:
email = raw_input('Email address: ')
while not password:
password = getpass.getpass('Password: ')
try:
n = backup(tumblr, email, password, output_dir)
except BadLogin:
print >> sys.stderr, 'Your email address and password combination was no good. Try again.'
else:
print >> sys.stderr, 'Backed up %s tumblr posts.' % n
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment