Last active
August 30, 2015 00:25
-
-
Save blech/65692 to your computer and use it in GitHub Desktop.
A hackish Tumblr backup tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# this script will fetch all your Tumblr posts into a single big JSON file | |
# TODO- authenticate (fetch private posts) | |
# TODO- fetch referenced media? | |
import urllib2 | |
import simplejson as json | |
start = 0 | |
posts = [] | |
domain = "craneporn" # TODO pull in from command line | |
def fetch_tumblr(start): | |
num = 50 | |
response = urllib2.urlopen('http://%s.tumblr.com/api/read/json?callback=false&num=%s&start=%s' % (domain, num, start)) | |
data = response.read() | |
data = data[6:] # TODO make this much more robust | |
data = data[:-3] | |
# print data | |
tumblr = json.loads(data) | |
return tumblr | |
tumblr = fetch_tumblr(start) | |
while (len(tumblr['posts']) == 50): | |
print "Fetched %s posts" % len(tumblr['posts']) | |
posts.extend(tumblr['posts']) | |
start += 50 | |
tumblr = fetch_tumblr(start) | |
print "Fetched final %s posts" % len(tumblr['posts']) | |
total = start + len(tumblr['posts']) | |
posts.extend(tumblr['posts']) | |
filename = "%s.json" % domain | |
file = open(filename, 'w') | |
file.write(json.dumps(posts)) | |
file.close() | |
print "Wrote %s posts to '%s'" % (total, filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment