Created
December 6, 2009 13:24
-
-
Save jdriscoll/250223 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import time | |
import urllib | |
import urllib2 | |
from django.core.management import setup_environ | |
# Make we're actually importing and activating the correct settings file here | |
import settings | |
setup_environ(settings) | |
from meowr.models import Article | |
from tagging.models import Tag | |
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write' | |
# set this to your blogs subdomain if importing into something other than your | |
# default blog: | |
# BLOG_URL = 'myblog.tumblr.com' | |
BLOG_URL = None | |
# Your current account email and password | |
USER_EMAIL = '' | |
USER_PASSWORD = '' | |
STATIC_URL = 'http://static.latherrinserepeat.org/images/' | |
DELAY = 10 | |
def main(): | |
count = 0 | |
timeout_seconds = 1 | |
for a in Article.live.order_by('pub_date'): | |
tag_set = Tag.objects.get_for_object(a) | |
if tag_set.count() == 0: | |
tags = 'cinema' | |
else: | |
tags = ','.join([t.name for t in tag_set]) | |
for r in a.rating.all(): | |
tags += ',%s' % r.description | |
body = a.body.encode('utf-8') | |
body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL) | |
body = body.replace('/static/images/', STATIC_URL) | |
data = {} | |
data['email'] = USER_EMAIL | |
data['password'] = USER_PASSWORD | |
data['type'] = 'regular' | |
data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S') | |
data['tags'] = tags.encode('utf-8') | |
data['format'] = 'markdown' | |
data['title'] = a.title.encode('utf-8') | |
data['body'] = body | |
if BLOG_URL is not None: | |
data['group'] = BLOG_URL | |
encoded_data = urllib.urlencode(data) | |
req = urllib2.Request(TUMBLR_POST_URL, encoded_data) | |
while 1: | |
try: | |
res = urllib2.urlopen(req) | |
except urllib2.URLError, e: | |
if e.code != 201: | |
print 'There was an error (code %s): %s' % (e.code, e.read()) | |
timeout_seconds = timeout_seconds*2 | |
print 'Retrying in %s seconds' % timeout_seconds | |
time.sleep(timeout_seconds) | |
continue | |
timeout_seconds = 1 | |
count += 1 | |
print 'Successfully migrated article "%s". %s completed...' % (a.title, count) | |
time.sleep(DELAY) | |
break | |
print "Successfully migrated %s articles." % count | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import sys | |
import time | |
import urllib | |
import urllib2 | |
from os import path | |
from django.core.management import setup_environ | |
# Make we're actually importing and activating the correct settings file here | |
import settings | |
setup_environ(settings) | |
from meowr.models import Article | |
from tagging.models import Tag | |
FILENAME = 'progress.pickle' | |
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write' | |
# set this to your blogs subdomain if importing into something other than your | |
# default blog: | |
# BLOG_URL = 'myblog.tumblr.com' | |
BLOG_URL = None | |
# Your current account email and password | |
USER_EMAIL = '' | |
USER_PASSWORD = '' | |
STATIC_URL = 'http://static.latherrinserepeat.org/images/' | |
DELAY = 10 | |
IMPORT_AS_PRIVATE = 1 # 0 for false | |
def main(): | |
count = 0 | |
timeout_seconds = 1 | |
migrate_all = False | |
# look for history file | |
if path.exists(FILENAME): | |
progress = pickle.load(open(FILENAME, 'r')) | |
else: | |
progress = [] | |
#print "DEBUG: %s" % progress | |
print "Current progress:\n%s" % '\n'.join([str(n) for n in progress]) | |
try: | |
for a in Article.live.order_by('pub_date'): | |
# Check to see if we've already migrated this post | |
pk = a._get_pk_val() | |
if pk in progress: | |
print "Skipping article: %s" % a.title | |
continue | |
print "DEBUG (PK): %s" % pk | |
if not migrate_all: | |
print 'Preparing to migrating the article: "%s".' % a.title | |
resp = raw_input("Continue? (Yes, No, All): ") | |
if resp.lower() == 'all': | |
migrate_all = True | |
elif resp.lower() != 'yes': | |
sys.exit('Exiting migration script...') | |
tag_set = Tag.objects.get_for_object(a) | |
if tag_set.count() == 0: | |
tags = 'cinema' | |
else: | |
tags = ','.join([t.name for t in tag_set]) | |
for r in a.rating.all(): | |
tags += ',%s' % r.description | |
body = a.body.encode('utf-8') | |
body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL) | |
body = body.replace('/static/images/', STATIC_URL) | |
data = {} | |
data['email'] = USER_EMAIL | |
data['password'] = USER_PASSWORD | |
data['type'] = 'regular' | |
data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S') | |
data['tags'] = tags.encode('utf-8') | |
data['format'] = 'markdown' | |
data['title'] = a.title.encode('utf-8') | |
data['body'] = body | |
data['private'] = IMPORT_AS_PRIVATE | |
if BLOG_URL is not None: | |
data['group'] = BLOG_URL | |
encoded_data = urllib.urlencode(data) | |
req = urllib2.Request(TUMBLR_POST_URL, encoded_data) | |
while 1: | |
try: | |
res = urllib2.urlopen(req) | |
except urllib2.URLError, e: | |
if e.code != 201: | |
print 'There was an error (code %s): %s' % (e.code, e.read()) | |
timeout_seconds = timeout_seconds*2 | |
print 'Retrying in %s seconds' % timeout_seconds | |
time.sleep(timeout_seconds) | |
continue | |
timeout_seconds = 1 | |
count += 1 | |
# Add article primary key to progress list | |
progress.append(pk) | |
print 'Successfully migrated article "%s". %s completed...' % (a.title, count) | |
time.sleep(DELAY) | |
break | |
finally: | |
print "Saving progress file..." | |
pickle.dump(progress, open(FILENAME, 'w')) | |
print "Successfully migrated %s articles." % count | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import time | |
import urllib | |
import urllib2 | |
from xml.etree import ElementTree as ET | |
RSS_URL = 'http://myfeed.com/rss' | |
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write' | |
# set this to your blogs subdomain if importing into something other than your | |
# default blog: | |
# BLOG_URL = 'myblog.tumblr.com' | |
BLOG_URL = None | |
# Your current account email and password | |
USER_EMAIL = '[email protected]' | |
USER_PASSWORD = 'secret' | |
STATIC_URL = 'http://static.myserver.com/images/' # replace this | |
DELAY = 10 | |
def main(): | |
count = 0 | |
timeout_seconds = 1 | |
req = urllib2.Request(RSS_URL) | |
res = urllib2.urlopen(req) | |
xml = ET.parse(res) | |
for item in xml.findall('channel/item'): | |
tags = [] | |
for c in item.findall('category'): | |
tags.append(c.text) | |
title = item.find('title').text | |
body = item.find('{http://purl.org/rss/1.0/modules/content/}encoded').text | |
#body = body.replace('http://oldserver.com/static/images/', STATIC_URL) | |
#body = body.replace('/static/images/', STATIC_URL) | |
data = {} | |
data['email'] = USER_EMAIL | |
data['password'] = USER_PASSWORD | |
data['type'] = 'regular' | |
data['date'] = item.find('pubDate').text | |
data['format'] = 'html' | |
data['title'] = title.encode('utf-8') | |
data['body'] = body.encode('utf-8') | |
data['tags'] = ','.join(tags) | |
if BLOG_URL is not None: | |
data['group'] = BLOG_URL | |
encoded_data = urllib.urlencode(data) | |
req = urllib2.Request(TUMBLR_POST_URL, encoded_data) | |
while 1: | |
try: | |
res = urllib2.urlopen(req) | |
except urllib2.URLError, e: | |
if e.code != 201: | |
print 'There was an error (code %s): %s' % (e.code, e.read()) | |
timeout_seconds = timeout_seconds*2 | |
print 'Retrying in %s seconds' % timeout_seconds | |
time.sleep(timeout_seconds) | |
continue | |
timeout_seconds = 1 | |
count += 1 | |
print 'Successfully migrated article "%s". %s completed...' % (title, count) | |
time.sleep(DELAY) | |
break | |
print "Successfully migrated %s articles." % count | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import getpass | |
import os | |
import pickle | |
import re | |
import sys | |
import time | |
import urllib | |
import urllib2 | |
from xml.etree import ElementTree as ET | |
PROGRESS_FILENAME = 'progress.pickle' | |
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write' | |
DELAY = 10 | |
RE_TUMBLR_XML = re.compile(r'<!-- BEGIN TUMBLR XML\s+(.*)\s+END TUMBLR XML -->', re.MULTILINE|re.DOTALL) | |
TYPE_MAP = { | |
'regular': {'title': 'regular-title', 'body': 'regular-body'}, | |
'photo': {'source': 'photo-url', 'caption': 'photo-caption', 'click-through-url': 'photo-link-url'}, | |
'quote': {'quote': 'quote-text', 'source': 'quote-source'}, | |
'link': {'name': 'link-text', 'url': 'link-url', 'description': 'link-description'}, | |
'conversation': {'title': 'conversation-title', 'conversation': 'conversation-text'}, | |
'video': {'caption': 'video-caption', 'embed': 'video-player'}, | |
'audio': {}, | |
} | |
def main(): | |
USER_EMAIL = raw_input('Enter the email address associated with your Tumblr account: ') | |
USER_PASSWORD = getpass.getpass() | |
BLOG_URL = raw_input('Enter a Tumblr subdomain (Ex. mysite.tumblr.com) or hit return to skip: ') | |
if BLOG_URL == '': | |
BLOG_URL = None | |
if BLOG_URL is None: | |
msg = 'Uploading data to your default blog. Continue? ' | |
else: | |
msg = 'Uploading data to %s. Continue? ' % BLOG_URL | |
if raw_input(msg).lower() not in ['y', 'yes']: | |
import sys | |
sys.exit() | |
count = 0 | |
timeout_seconds = DELAY | |
dir_path = os.path.dirname(os.path.abspath(__file__)) | |
# look for history file | |
if os.path.exists(PROGRESS_FILENAME): | |
progress = pickle.load(open(PROGRESS_FILENAME, 'r')) | |
count = len(progress) | |
else: | |
progress = [] | |
try: | |
for filename in os.listdir(dir_path): | |
# Process only html files | |
if filename[-4:] != 'html': | |
continue | |
pth = os.path.join(dir_path, filename) | |
xml = ET.fromstring(RE_TUMBLR_XML.search(open(pth).read()).group(1)) | |
# Check to see if we've already migrated this post | |
if xml.get('id') in progress: | |
print "Skipping post: %s" % xml.get('id') | |
continue | |
else: | |
print "Uploading post: %s" % xml.get('id') | |
post_type = xml.get('type') | |
params = TYPE_MAP[post_type] | |
data = {} | |
data['email'] = USER_EMAIL | |
data['password'] = USER_PASSWORD | |
data['type'] = post_type | |
data['date'] = xml.get('date-gmt') | |
data['format'] = xml.get('format') | |
for key, val in params.items(): | |
if val == 'photo-url': | |
elements = xml.findall(val) | |
for photo_url in elements: | |
if 'media.tumblr.com' not in photo_url.text: | |
continue | |
else: | |
data[key] = photo_url.text | |
break | |
else: | |
el = xml.find(val) | |
if el is not None: | |
data[key] = el.text.encode('utf-8') | |
if BLOG_URL is not None: | |
data['group'] = BLOG_URL | |
encoded_data = urllib.urlencode(data) | |
req = urllib2.Request(TUMBLR_POST_URL, encoded_data) | |
while 1: | |
try: | |
res = urllib2.urlopen(req) | |
except urllib2.URLError, e: | |
if e.code != 201: | |
print 'There was an error (code %s): %s' % (e.code, e.read()) | |
timeout_seconds = timeout_seconds*2 | |
print 'Retrying in %s seconds' % timeout_seconds | |
time.sleep(timeout_seconds) | |
continue | |
timeout_seconds = DELAY | |
count += 1 | |
# Add article primary key to progress list | |
progress.append(xml.get('id')) | |
print 'Successfully migrated post "%s". %s completed, pausing for %s seconds...' % (xml.get('id'), count, DELAY) | |
time.sleep(DELAY) | |
break | |
finally: | |
print "Saving progress file..." | |
pickle.dump(progress, open(PROGRESS_FILENAME, 'w')) | |
print "Successfully migrated %s articles." % count | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment