Skip to content

Instantly share code, notes, and snippets.

@yuribossa
Created February 27, 2010 11:01
Show Gist options
  • Save yuribossa/316633 to your computer and use it in GitHub Desktop.
Save yuribossa/316633 to your computer and use it in GitHub Desktop.
Tumblr Photo backup on Google App Engine.
# -*- coding: utf8 -*-
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.api import urlfetch
from google.appengine.ext import db
from google.appengine.api import mail
from google.appengine.api.labs import taskqueue
from BeautifulSoup import BeautifulStoneSoup
import string
import random
import re
tumblr_account = ''
from_mailaddress = ''
to_mailaddress = ''
class TumblrPhoto(db.Model):
url = db.StringProperty()
queued = db.BooleanProperty()
mailsent = db.BooleanProperty()
content = db.BlobProperty()
class MailHandler(webapp.RequestHandler):
def get(self, id=''):
if not id:
return
photo = TumblrPhoto.get_by_key_name(id)
if photo is None:
return
filename = photo.url
if not re.compile('^.*\.jpe?g$').search(filename):
alphabets = string.digits + string.letters
filename = ''.join(random.choice(alphabets) for i in xrange(20)) + '.jpg'
mail.send_mail(
sender = from_mailaddress,
to = to_mailaddress,
subject = filename,
body = filename,
attachments = [(filename, photo.content)]
)
photo.content = None
photo.mailsent = True
photo.put()
class FetchHandler(webapp.RequestHandler):
def get(self, id=''):
if not id:
return
photo = TumblrPhoto.get_by_key_name(id)
if photo is None:
return
result = urlfetch.fetch(photo.url)
if result.status_code != 200:
taskqueue.add(url='/tumblr/fetch/'+id, method='GET')
return
photo.content = result.content
photo.put()
taskqueue.add(url='/tumblr/mail/'+id, method='GET')
class CronHandler(webapp.RequestHandler):
def get(self):
photos = db.Query(TumblrPhoto).filter('queued =', False).fetch(10)
for photo in photos:
taskqueue.add(url='/tumblr/fetch/'+photo.key().name(), method='GET')
photo.queued = True
photo.put()
class CheckHandler(webapp.RequestHandler):
def get(self, start='', end=''):
if not start or not end:
return
url = 'http://' + tumblr_account + '.tumblr.com/api/read?num=50&type=photo&start=' + start
result = urlfetch.fetch(url)
if result.status_code == 200:
soup = BeautifulStoneSoup(result.content)
for (post, photo_url) in zip(soup('post'), soup('photo-url', {'max-width': '1280'})):
self.response.out.write(post['id']+','+photo_url.string+'<br>')
p = TumblrPhoto.get_by_key_name(post['id'])
if p is None:
p = TumblrPhoto(key_name=post['id'], url=photo_url.string+'', queued=False, mailsent=False)
p.put()
if int(start)+50 < int(end):
start = str(int(start)+50)
taskqueue.add(url='/tumblr/check/'+start+'/'+end, method='GET')
else:
taskqueue.add(url='/tumblr/check/'+start+'/'+end, method='GET')
class StartHandler(webapp.RequestHandler):
def get(self):
url = 'http://' + tumblr_account + '.tumblr.com/api/read?type=photo'
result = urlfetch.fetch(url)
if result.status_code == 200:
soup = BeautifulStoneSoup(result.content)
posts = soup('posts')
end = posts[0]['total']
taskqueue.add(url='/tumblr/check/0/'+end, method='GET')
self.response.out.write('Task start.')
else:
self.response.out.write('error')
application = webapp.WSGIApplication(
[('/tumblr/mail/(.+)', MailHandler),
('/tumblr/fetch/(.+)', FetchHandler),
('/tumblr/cron', CronHandler),
('/tumblr/check/(.+)/(.+)', CheckHandler),
('/tumblr/start', StartHandler)],
debug=True)
def main():
run_wsgi_app(application)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment