jdriscoll · December 6, 2009 13:24
diff --git a/migrate.py b/migrate.py
 import sys
 import time
 import urllib
 import urllib2

 from django.core.management import setup_environ

 # Make we're actually importing and activating the correct settings file here
 import settings
 setup_environ(settings)

 from meowr.models import Article
 from tagging.models import Tag


 TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

 # set this to your blogs subdomain if importing into something other than your
 # default blog:
 # BLOG_URL = 'myblog.tumblr.com'
 BLOG_URL = None

 # Your current account email and password
 USER_EMAIL = ''
 USER_PASSWORD = ''

 STATIC_URL = 'http://static.latherrinserepeat.org/images/'

 DELAY = 10

 def main():

 	count = 0
 	timeout_seconds = 1
 	
 	for a in Article.live.order_by('pub_date'):
 		
 		tag_set = Tag.objects.get_for_object(a)
 		
 		if tag_set.count() == 0:
 			tags = 'cinema'
 		else:
 			tags = ','.join([t.name for t in tag_set])
 		
 		for r in a.rating.all():
 			tags += ',%s' % r.description
 		
 		
 		body = a.body.encode('utf-8')
 		body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
 		body = body.replace('/static/images/', STATIC_URL)
 		
 		data = {}
 		data['email'] = USER_EMAIL
 		data['password'] = USER_PASSWORD
 		data['type'] = 'regular'
 		data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
 		data['tags'] = tags.encode('utf-8')
 		data['format'] = 'markdown'
 		data['title'] = a.title.encode('utf-8')
 		data['body'] = body
 		
 		
 		
 		if BLOG_URL is not None:
 			data['group'] = BLOG_URL
 		
 		encoded_data = urllib.urlencode(data)
 		req = urllib2.Request(TUMBLR_POST_URL, encoded_data)
 		
 		while 1:
 			try:
 				res = urllib2.urlopen(req)
 			except urllib2.URLError, e:
 				if e.code != 201:
 					print 'There was an error (code %s): %s' % (e.code, e.read())
 					timeout_seconds = timeout_seconds*2
 					print 'Retrying in %s seconds' % timeout_seconds
 					time.sleep(timeout_seconds)
 					continue
 			timeout_seconds = 1
 			count += 1
 			print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
 			time.sleep(DELAY)
 			break
 			
 	print "Successfully migrated %s articles." % count

 if __name__ == '__main__':
 	main()
diff --git a/migrate2.py b/migrate2.py
 import pickle
 import sys
 import time
 import urllib
 import urllib2

 from os import path
 from django.core.management import setup_environ

 # Make we're actually importing and activating the correct settings file here
 import settings
 setup_environ(settings)

 from meowr.models import Article
 from tagging.models import Tag

 FILENAME = 'progress.pickle'

 TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

 # set this to your blogs subdomain if importing into something other than your
 # default blog:
 # BLOG_URL = 'myblog.tumblr.com'
 BLOG_URL = None

 # Your current account email and password
 USER_EMAIL = ''
 USER_PASSWORD = ''

 STATIC_URL = 'http://static.latherrinserepeat.org/images/'

 DELAY = 10

 IMPORT_AS_PRIVATE = 1 # 0 for false

 def main():

    count = 0
    timeout_seconds = 1
    migrate_all = False
    
    # look for history file
    if path.exists(FILENAME):
        progress = pickle.load(open(FILENAME, 'r'))
    else:
        progress = []
    
    #print "DEBUG: %s" % progress
    print "Current progress:\n%s" % '\n'.join([str(n) for n in progress])
    
    try:
        
        for a in Article.live.order_by('pub_date'):
        
            # Check to see if we've already migrated this post
            pk = a._get_pk_val()
            if pk in progress:
                print "Skipping article: %s" % a.title
                continue
            
            print "DEBUG (PK): %s" % pk
            
            if not migrate_all:
                print 'Preparing to migrating the article: "%s".' % a.title
                resp = raw_input("Continue? (Yes, No, All): ")
                if resp.lower() == 'all':
                    migrate_all = True
                elif resp.lower() != 'yes':
                    sys.exit('Exiting migration script...')          
            
            tag_set = Tag.objects.get_for_object(a)
        
            if tag_set.count() == 0:
                tags = 'cinema'
            else:
                tags = ','.join([t.name for t in tag_set])
        
            for r in a.rating.all():
                tags += ',%s' % r.description
        
        
            body = a.body.encode('utf-8')
            body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
            body = body.replace('/static/images/', STATIC_URL)
        
            data = {}
            data['email'] = USER_EMAIL
            data['password'] = USER_PASSWORD
            data['type'] = 'regular'
            data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
            data['tags'] = tags.encode('utf-8')
            data['format'] = 'markdown'
            data['title'] = a.title.encode('utf-8')
            data['body'] = body
            data['private'] = IMPORT_AS_PRIVATE
        
            if BLOG_URL is not None:
                data['group'] = BLOG_URL
        
            encoded_data = urllib.urlencode(data)
            req = urllib2.Request(TUMBLR_POST_URL, encoded_data)
        
            while 1:
                try:
                    res = urllib2.urlopen(req)
                except urllib2.URLError, e:
                    if e.code != 201:
                        print 'There was an error (code %s): %s' % (e.code, e.read())
                        timeout_seconds = timeout_seconds*2
                        print 'Retrying in %s seconds' % timeout_seconds
                        time.sleep(timeout_seconds)
                        continue
                timeout_seconds = 1
                count += 1
            
                # Add article primary key to progress list
                progress.append(pk)
            
                print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
                time.sleep(DELAY)
                break
    
    finally:
        print "Saving progress file..."
        pickle.dump(progress, open(FILENAME, 'w'))
        
    print "Successfully migrated %s articles." % count

 if __name__ == '__main__':
    main()
diff --git a/migrate_rss.py b/migrate_rss.py
 import sys
 import time
 import urllib
 import urllib2

 from xml.etree import ElementTree as ET

 RSS_URL = 'http://myfeed.com/rss'

 TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

 # set this to your blogs subdomain if importing into something other than your
 # default blog:
 # BLOG_URL = 'myblog.tumblr.com'
 BLOG_URL = None

 # Your current account email and password
 USER_EMAIL = '[email protected]'
 USER_PASSWORD = 'secret'

 STATIC_URL = 'http://static.myserver.com/images/' # replace this

 DELAY = 10


 def main():

 	count = 0
 	timeout_seconds = 1
 	
 	req = urllib2.Request(RSS_URL)
 	res = urllib2.urlopen(req)
 	
 	xml = ET.parse(res)
 	
 	for item in xml.findall('channel/item'):
 		
 		tags = []
 		for c in item.findall('category'):
 			tags.append(c.text)
 			
 		title = item.find('title').text
 		
 		body = item.find('{http://purl.org/rss/1.0/modules/content/}encoded').text
 		#body = body.replace('http://oldserver.com/static/images/', STATIC_URL)
 		#body = body.replace('/static/images/', STATIC_URL)
 			
 		data = {}
 		data['email'] = USER_EMAIL
 		data['password'] = USER_PASSWORD
 		data['type'] = 'regular'
 		data['date'] = item.find('pubDate').text
 		data['format'] = 'html'
 		data['title'] = title.encode('utf-8')
 		data['body'] = body.encode('utf-8')
 		data['tags'] = ','.join(tags)
 		
 		if BLOG_URL is not None:
 			data['group'] = BLOG_URL
 		
 		encoded_data = urllib.urlencode(data)
 		req = urllib2.Request(TUMBLR_POST_URL, encoded_data)
 		
 		while 1:
 			try:
 				res = urllib2.urlopen(req)
 			except urllib2.URLError, e:
 				if e.code != 201:
 					print 'There was an error (code %s): %s' % (e.code, e.read())
 					timeout_seconds = timeout_seconds*2
 					print 'Retrying in %s seconds' % timeout_seconds
 					time.sleep(timeout_seconds)
 					continue
 			timeout_seconds = 1
 			count += 1
 			print 'Successfully migrated article "%s". %s completed...' % (title, count)
 			time.sleep(DELAY)
 			break
 			
 	print "Successfully migrated %s articles." % count

 if __name__ == '__main__':
 	main()
diff --git a/migrate_tumblr.py b/migrate_tumblr.py
 import getpass
 import os
 import pickle
 import re
 import sys
 import time
 import urllib
 import urllib2


 from xml.etree import ElementTree as ET

 PROGRESS_FILENAME = 'progress.pickle'

 TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
 DELAY = 10

 RE_TUMBLR_XML = re.compile(r'<!-- BEGIN TUMBLR XML\s+(.*)\s+END TUMBLR XML -->', re.MULTILINE|re.DOTALL)

 TYPE_MAP = {
    'regular': {'title': 'regular-title', 'body': 'regular-body'},
    'photo': {'source': 'photo-url', 'caption': 'photo-caption', 'click-through-url': 'photo-link-url'},
    'quote': {'quote': 'quote-text', 'source': 'quote-source'},
    'link': {'name': 'link-text', 'url': 'link-url', 'description': 'link-description'},
    'conversation': {'title': 'conversation-title', 'conversation': 'conversation-text'},
    'video': {'caption': 'video-caption', 'embed': 'video-player'},
    'audio': {},
 }

 def main():

    USER_EMAIL = raw_input('Enter the email address associated with your Tumblr account: ')
    USER_PASSWORD = getpass.getpass()
    BLOG_URL = raw_input('Enter a Tumblr subdomain (Ex. mysite.tumblr.com) or hit return to skip: ')

    if BLOG_URL == '':
        BLOG_URL = None

    if BLOG_URL is None:
        msg = 'Uploading data to your default blog. Continue? '
    else:
        msg = 'Uploading data to %s. Continue? ' % BLOG_URL
    if raw_input(msg).lower() not in ['y', 'yes']:
        import sys
        sys.exit()

    count = 0
    timeout_seconds = DELAY

    dir_path = os.path.dirname(os.path.abspath(__file__))

    # look for history file
    if os.path.exists(PROGRESS_FILENAME):
        progress = pickle.load(open(PROGRESS_FILENAME, 'r'))
        count = len(progress)
    else:
        progress = []

    try:
        for filename in os.listdir(dir_path):

            # Process only html files
            if filename[-4:] != 'html':
                continue

            pth = os.path.join(dir_path, filename)
            xml = ET.fromstring(RE_TUMBLR_XML.search(open(pth).read()).group(1))

            # Check to see if we've already migrated this post
            if xml.get('id') in progress:
                print "Skipping post: %s" % xml.get('id')
                continue
            else:
                print "Uploading post: %s" % xml.get('id')

            post_type = xml.get('type')
            params = TYPE_MAP[post_type]

            data = {}
            data['email'] = USER_EMAIL
            data['password'] = USER_PASSWORD
            data['type'] = post_type
            data['date'] = xml.get('date-gmt')
            data['format'] = xml.get('format')

            for key, val in params.items():
                if val == 'photo-url':
                    elements = xml.findall(val)
                    for photo_url in elements:
                        if 'media.tumblr.com' not in photo_url.text:
                            continue
                        else:
                            data[key] = photo_url.text
                            break
                else:
                    el = xml.find(val)
                    if el is not None:
                        data[key] = el.text.encode('utf-8')

            if BLOG_URL is not None:
                data['group'] = BLOG_URL

            encoded_data = urllib.urlencode(data)
            req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

            while 1:
                try:
                    res = urllib2.urlopen(req)
                except urllib2.URLError, e:
                    if e.code != 201:
                        print 'There was an error (code %s): %s' % (e.code, e.read())
                        timeout_seconds = timeout_seconds*2
                        print 'Retrying in %s seconds' % timeout_seconds
                        time.sleep(timeout_seconds)
                        continue
                timeout_seconds = DELAY
                count += 1

                # Add article primary key to progress list
                progress.append(xml.get('id'))

                print 'Successfully migrated post "%s". %s completed, pausing for %s seconds...' % (xml.get('id'), count, DELAY)
                time.sleep(DELAY)
                break

    finally:
        print "Saving progress file..."
        pickle.dump(progress, open(PROGRESS_FILENAME, 'w'))

    print "Successfully migrated %s articles." % count


 if __name__ == '__main__':
    main()
	import sys
	import time
	import urllib
	import urllib2

	from django.core.management import setup_environ

	# Make we're actually importing and activating the correct settings file here
	import settings
	setup_environ(settings)

	from meowr.models import Article
	from tagging.models import Tag


	TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

	# set this to your blogs subdomain if importing into something other than your
	# default blog:
	# BLOG_URL = 'myblog.tumblr.com'
	BLOG_URL = None

	# Your current account email and password
	USER_EMAIL = ''
	USER_PASSWORD = ''

	STATIC_URL = 'http://static.latherrinserepeat.org/images/'

	DELAY = 10

	def main():

	count = 0
	timeout_seconds = 1

	for a in Article.live.order_by('pub_date'):

	tag_set = Tag.objects.get_for_object(a)

	if tag_set.count() == 0:
	tags = 'cinema'
	else:
	tags = ','.join([t.name for t in tag_set])

	for r in a.rating.all():
	tags += ',%s' % r.description


	body = a.body.encode('utf-8')
	body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
	body = body.replace('/static/images/', STATIC_URL)

	data = {}
	data['email'] = USER_EMAIL
	data['password'] = USER_PASSWORD
	data['type'] = 'regular'
	data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
	data['tags'] = tags.encode('utf-8')
	data['format'] = 'markdown'
	data['title'] = a.title.encode('utf-8')
	data['body'] = body



	if BLOG_URL is not None:
	data['group'] = BLOG_URL

	encoded_data = urllib.urlencode(data)
	req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

	while 1:
	try:
	res = urllib2.urlopen(req)
	except urllib2.URLError, e:
	if e.code != 201:
	print 'There was an error (code %s): %s' % (e.code, e.read())
	timeout_seconds = timeout_seconds*2
	print 'Retrying in %s seconds' % timeout_seconds
	time.sleep(timeout_seconds)
	continue
	timeout_seconds = 1
	count += 1
	print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
	time.sleep(DELAY)
	break

	print "Successfully migrated %s articles." % count

	if __name__ == '__main__':
	main()
	import pickle
	import sys
	import time
	import urllib
	import urllib2

	from os import path
	from django.core.management import setup_environ

	# Make we're actually importing and activating the correct settings file here
	import settings
	setup_environ(settings)

	from meowr.models import Article
	from tagging.models import Tag

	FILENAME = 'progress.pickle'

	TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

	# set this to your blogs subdomain if importing into something other than your
	# default blog:
	# BLOG_URL = 'myblog.tumblr.com'
	BLOG_URL = None

	# Your current account email and password
	USER_EMAIL = ''
	USER_PASSWORD = ''

	STATIC_URL = 'http://static.latherrinserepeat.org/images/'

	DELAY = 10

	IMPORT_AS_PRIVATE = 1 # 0 for false

	def main():

	count = 0
	timeout_seconds = 1
	migrate_all = False

	# look for history file
	if path.exists(FILENAME):
	progress = pickle.load(open(FILENAME, 'r'))
	else:
	progress = []

	#print "DEBUG: %s" % progress
	print "Current progress:\n%s" % '\n'.join([str(n) for n in progress])

	try:

	for a in Article.live.order_by('pub_date'):

	# Check to see if we've already migrated this post
	pk = a._get_pk_val()
	if pk in progress:
	print "Skipping article: %s" % a.title
	continue

	print "DEBUG (PK): %s" % pk

	if not migrate_all:
	print 'Preparing to migrating the article: "%s".' % a.title
	resp = raw_input("Continue? (Yes, No, All): ")
	if resp.lower() == 'all':
	migrate_all = True
	elif resp.lower() != 'yes':
	sys.exit('Exiting migration script...')

	tag_set = Tag.objects.get_for_object(a)

	if tag_set.count() == 0:
	tags = 'cinema'
	else:
	tags = ','.join([t.name for t in tag_set])

	for r in a.rating.all():
	tags += ',%s' % r.description


	body = a.body.encode('utf-8')
	body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
	body = body.replace('/static/images/', STATIC_URL)

	data = {}
	data['email'] = USER_EMAIL
	data['password'] = USER_PASSWORD
	data['type'] = 'regular'
	data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
	data['tags'] = tags.encode('utf-8')
	data['format'] = 'markdown'
	data['title'] = a.title.encode('utf-8')
	data['body'] = body
	data['private'] = IMPORT_AS_PRIVATE

	if BLOG_URL is not None:
	data['group'] = BLOG_URL

	encoded_data = urllib.urlencode(data)
	req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

	while 1:
	try:
	res = urllib2.urlopen(req)
	except urllib2.URLError, e:
	if e.code != 201:
	print 'There was an error (code %s): %s' % (e.code, e.read())
	timeout_seconds = timeout_seconds*2
	print 'Retrying in %s seconds' % timeout_seconds
	time.sleep(timeout_seconds)
	continue
	timeout_seconds = 1
	count += 1

	# Add article primary key to progress list
	progress.append(pk)

	print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
	time.sleep(DELAY)
	break

	finally:
	print "Saving progress file..."
	pickle.dump(progress, open(FILENAME, 'w'))

	print "Successfully migrated %s articles." % count

	if __name__ == '__main__':
	main()
	import getpass
	import os
	import pickle
	import re
	import sys
	import time
	import urllib
	import urllib2


	from xml.etree import ElementTree as ET

	PROGRESS_FILENAME = 'progress.pickle'

	TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
	DELAY = 10

	RE_TUMBLR_XML = re.compile(r'<!-- BEGIN TUMBLR XML\s+(.*)\s+END TUMBLR XML -->', re.MULTILINE\|re.DOTALL)

	TYPE_MAP = {
	'regular': {'title': 'regular-title', 'body': 'regular-body'},
	'photo': {'source': 'photo-url', 'caption': 'photo-caption', 'click-through-url': 'photo-link-url'},
	'quote': {'quote': 'quote-text', 'source': 'quote-source'},
	'link': {'name': 'link-text', 'url': 'link-url', 'description': 'link-description'},
	'conversation': {'title': 'conversation-title', 'conversation': 'conversation-text'},
	'video': {'caption': 'video-caption', 'embed': 'video-player'},
	'audio': {},
	}

	def main():

	USER_EMAIL = raw_input('Enter the email address associated with your Tumblr account: ')
	USER_PASSWORD = getpass.getpass()
	BLOG_URL = raw_input('Enter a Tumblr subdomain (Ex. mysite.tumblr.com) or hit return to skip: ')

	if BLOG_URL == '':
	BLOG_URL = None

	if BLOG_URL is None:
	msg = 'Uploading data to your default blog. Continue? '
	else:
	msg = 'Uploading data to %s. Continue? ' % BLOG_URL
	if raw_input(msg).lower() not in ['y', 'yes']:
	import sys
	sys.exit()

	count = 0
	timeout_seconds = DELAY

	dir_path = os.path.dirname(os.path.abspath(__file__))

	# look for history file
	if os.path.exists(PROGRESS_FILENAME):
	progress = pickle.load(open(PROGRESS_FILENAME, 'r'))
	count = len(progress)
	else:
	progress = []

	try:
	for filename in os.listdir(dir_path):

	# Process only html files
	if filename[-4:] != 'html':
	continue

	pth = os.path.join(dir_path, filename)
	xml = ET.fromstring(RE_TUMBLR_XML.search(open(pth).read()).group(1))

	# Check to see if we've already migrated this post
	if xml.get('id') in progress:
	print "Skipping post: %s" % xml.get('id')
	continue
	else:
	print "Uploading post: %s" % xml.get('id')

	post_type = xml.get('type')
	params = TYPE_MAP[post_type]

	data = {}
	data['email'] = USER_EMAIL
	data['password'] = USER_PASSWORD
	data['type'] = post_type
	data['date'] = xml.get('date-gmt')
	data['format'] = xml.get('format')

	for key, val in params.items():
	if val == 'photo-url':
	elements = xml.findall(val)
	for photo_url in elements:
	if 'media.tumblr.com' not in photo_url.text:
	continue
	else:
	data[key] = photo_url.text
	break
	else:
	el = xml.find(val)
	if el is not None:
	data[key] = el.text.encode('utf-8')

	if BLOG_URL is not None:
	data['group'] = BLOG_URL

	encoded_data = urllib.urlencode(data)
	req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

	while 1:
	try:
	res = urllib2.urlopen(req)
	except urllib2.URLError, e:
	if e.code != 201:
	print 'There was an error (code %s): %s' % (e.code, e.read())
	timeout_seconds = timeout_seconds*2
	print 'Retrying in %s seconds' % timeout_seconds
	time.sleep(timeout_seconds)
	continue
	timeout_seconds = DELAY
	count += 1

	# Add article primary key to progress list
	progress.append(xml.get('id'))

	print 'Successfully migrated post "%s". %s completed, pausing for %s seconds...' % (xml.get('id'), count, DELAY)
	time.sleep(DELAY)
	break

	finally:
	print "Saving progress file..."
	pickle.dump(progress, open(PROGRESS_FILENAME, 'w'))

	print "Successfully migrated %s articles." % count


	if __name__ == '__main__':
	main()