Created
December 5, 2012 20:12
-
-
Save neuromusic/4219079 to your computer and use it in GitHub Desktop.
script which migrates a blog from Drupal 6 to WordPress 3.4 using sqlalchemy & wordpress_xmlrpc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import json | |
import datetime | |
from sqlalchemy import create_engine | |
from sqlalchemy.ext.declarative import declarative_base | |
from sqlalchemy.orm import sessionmaker | |
from wordpress_xmlrpc import Client, WordPressPost, WordPressComment, WordPressTerm | |
from wordpress_xmlrpc.methods import media, posts, users, comments, taxonomies | |
from wordpress_xmlrpc.compat import xmlrpc_client | |
# works with... | |
# - wordpress_xmlrpc v2.2+ (comment timestamps fail < 2.2) | |
# - sqlalchemy 0.7.8 | |
# connect to drupal6 db w/ sqlalchemy | |
_engine = create_engine('mysql+mysqldb://root:root@localhost/drupal_archive?unix_socket=/Applications/MAMP/tmp/mysql/mysql.sock&charset=utf8', echo=False) | |
_Base = declarative_base(_engine) | |
# connect to wordpress w/ wordpress_xmlrpc | |
_client = Client('http://localhost/wordpress/xmlrpc.php', 'admin', 'password') | |
_wp_authors = _client.call(users.GetAuthors()) | |
# classes for building the sqlalchemy ORM from tables | |
class Nodes(_Base): | |
""" node for blog posts""" | |
__tablename__ = 'node' | |
__table_args__ = {'autoload':True} | |
class NodeRevisions(_Base): | |
""" revisions of nodes""" | |
__tablename__ = 'node_revisions' | |
__table_args__ = {'autoload':True} | |
class Comments(_Base): | |
"""um. comments. """ | |
__tablename__ = 'comments' | |
__table_args__ = {'autoload':True} | |
class FieldBlogImage(_Base): | |
""" CCK field for blog images""" | |
__tablename__ = 'content_field_blog_image' | |
__table_args__ = {'autoload':True} | |
class Files(_Base): | |
""" the actual image files""" | |
__tablename__ = 'files' | |
__table_args__ = {'autoload':True} | |
class Terms(_Base): | |
""" tags & categories""" | |
__tablename__ = 'term_data' | |
__table_args__ = {'autoload':True} | |
class TermNodes(_Base): | |
""" maps tags to categories""" | |
__tablename__ = 'term_node' | |
__table_args__ = {'autoload':True} | |
# functions for the migration | |
def grab_drupal_blog(session): | |
""" gets the blog items from the drupal installation and builds nested dictionary """ | |
blog_nodes = session.query(Nodes).filter(Nodes.status > 0, Nodes.type=='blog') | |
blog = {}; | |
for item in blog_nodes: | |
post = {} | |
if item.uid == 1: | |
post['author'] = 'Hodgkin' | |
elif item.uid == 2: | |
post['author'] = 'Huxley' | |
post['created'] = datetime.datetime.fromtimestamp(item.created) | |
post['changed'] = datetime.datetime.fromtimestamp(item.changed) | |
# grab the latest revision | |
latest = session.query(NodeRevisions).get(item.vid) | |
post['title'] = latest.title#.decode("utf-8", "replace") | |
post['content'] = latest.body#.decode("utf-8", "replace") | |
post['excerpt'] = latest.teaser#.decode("utf-8", "replace") | |
post['timestamp'] = datetime.datetime.fromtimestamp(latest.timestamp) | |
post['images'] = {} | |
images = session.query(FieldBlogImage).join(Nodes, Nodes.nid==FieldBlogImage.nid).filter(FieldBlogImage.nid==item.nid) | |
for image in images: | |
image_info = {} | |
image_info['title'] = image.field_blog_image_title | |
image_info['alt'] = image.field_blog_image_alt | |
image_info['delta'] = image.delta | |
file_info = session.query(Files).get(image.field_blog_image_fid) | |
image_info['filename'] = file_info.filepath.split('/')[-1] | |
image_info['mimetype'] = file_info.filemime | |
post['images'][image.delta] = image_info | |
post['categories'] = [] | |
terms = session.query(TermNodes).join(Nodes, Nodes.nid==TermNodes.nid).filter(TermNodes.nid==item.nid) | |
for term in terms: | |
term_name = session.query(Terms).get(term.tid).name | |
post['categories'].append(term_name) | |
post['comments'] = {} | |
comments = session.query(Comments).join(Nodes, Nodes.nid==Comments.nid).filter(Comments.nid==item.nid) | |
for comment in comments: | |
comment_info = {} | |
comment_info['subject'] = comment.subject | |
comment_info['content'] = comment.comment | |
comment_info['author_IP'] = comment.hostname | |
comment_info['author'] = comment.name | |
comment_info['author_email'] = comment.mail | |
comment_info['author_url'] = comment.homepage | |
comment_info['date'] = datetime.datetime.fromtimestamp(comment.timestamp) | |
post['comments'][comment.cid] = comment_info | |
blog[item.nid] = post | |
return blog | |
def loadSession(): | |
""" connect to drupal database | |
returns: | |
- sqlalchemy session handler | |
""" | |
metadata = _Base.metadata | |
Session = sessionmaker(bind=_engine) | |
session = Session() | |
return session | |
def author_id_from_name(name): | |
""" get wp author id from a name | |
parameter: | |
- name: string of author display name | |
returns: | |
- author's id | |
""" | |
for author in _wp_authors: | |
if name.lower() == author.display_name.lower(): | |
return author.id | |
return 0 | |
def upload_image(image): | |
""" adds image to wordpress & returns image info | |
(assumes images have all been copied to ./images/) | |
""" | |
data = {} | |
data['name'] = image['filename'] | |
data['type'] = image['mimetype'] | |
filename = "./images/" + data['name'] | |
print "uploading image... %s" % (data['name']) | |
with open(filename, 'rb') as img: | |
data['bits'] = xmlrpc_client.Binary(img.read()) | |
return _client.call(media.UploadFile(data)) | |
def upload_comment(comment,post_id): | |
"""adds comment to post & returns comment id""" | |
new_comment = WordPressComment() | |
# new_comment.post = post_id | |
new_comment.content = '<b>%s</b>\n%s' % (comment_info['subject'] , comment_info['content'],) | |
new_comment.date_created = comment_info['date'] | |
comment_id = _client.call(comments.NewComment(post_id,new_comment)) | |
new_comment.author_ip = comment_info['author_IP'] | |
new_comment.author_url = comment_info['author_url'] | |
new_comment.author_email = comment_info['author_email'] | |
new_comment.author = comment_info['author'] | |
print "adding comment... %s,%s" % (comment_info['date'],comment_info['author']) | |
return _client.call(comments.EditComment(comment_id,new_comment)) | |
def upload_post(post): | |
""" adds post to wordpress and returns post id""" | |
# upload all of the images | |
image_html = [] | |
for index in sorted(post['images'].keys()): | |
image = post['images'][index] | |
response = upload_image(image) | |
html = '<a href="%s"><img class="alignnone size-large wp-image-616" title="%s" src="%s" alt="%s" width="1024" height="682" /></a>' % (response['url'],image['title'],response['url'],image['title'],) | |
image_html.append(html) | |
# add post to wordpress | |
new_post = WordPressPost() | |
new_post.title = post['title'] | |
new_post.content = '\n'.join([post['content'],]+image_html) # appends images to end of post | |
new_post.excerpt = post['excerpt'] | |
new_post.date = post['created'] | |
new_post.post_status = 'publish' | |
new_post.comment_status = True | |
new_post.ping_status = True | |
new_post.terms_names = { | |
'post_tag': post['categories'], | |
} | |
new_post.user = author_id_from_name(post['author']) | |
print "submitting post... %s: %s" % (new_post.date,new_post.title) | |
return _client.call(posts.NewPost(new_post)) | |
if __name__ == "__main__": | |
# connect to drupal 6 db | |
session = loadSession() | |
# pull blog content from drupal 6 db | |
blog = grab_drupal_blog(session) | |
# dump blog content into json file | |
with open('export_blog.json','wb') as f: | |
dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None | |
json.dump(blog,f,indent=4, sort_keys=True, default=dthandler) | |
# submit blog posts to wordpress | |
for node_id, post in blog.iteritems(): | |
post_id = upload_post(post) | |
for comment_id, comment_info in post['comments'].iteritems(): | |
upload_comment(comment_info,post_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment