Skip to content

Instantly share code, notes, and snippets.

@cageyjames
Created November 26, 2012 21:02
Show Gist options
  • Save cageyjames/4150589 to your computer and use it in GitHub Desktop.
Save cageyjames/4150589 to your computer and use it in GitHub Desktop.
WordPress.xml to Markdown (Octopress)
""" parse_wordpress_xml.py
Takes a WordPress XML export file and converts it to Octopress flavored Markdown files.
Author: James Fee (http://github.com/cageyjames)
"""
import string
import sys
import feedparser
def sanitize_filename(filename):
"""Strips special chars from the blog post names. e.g. <title>Suggestions?</title>
"""
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
return ''.join(c for c in filename if c in valid_chars)
# Path to WordPress export xml
atom_xml = r"~\Projects\Wordpress\wordpress.2011-09-29.xml"
atom_xml = open(atom_xml).read()
d = feedparser.parse(atom_xml) # the url of the Atom feed can also be used here
for entry in d.entries:
if entry["wp_status"] != "publish": continue
date = "%s-%s-%s" % (entry.date_parsed[0],entry.date_parsed[1],entry.date_parsed[2])
title = entry.title
title = title.replace(" ","-")
filename = "./_posts/"+sanitize_filename("%s-%s" % (date,title)) + ".markdown"
print filename
of = open(filename,"w")
of.write("""---
layout: blog
title: %s
post_author: %s
categories:
""" % (entry.title.encode("ascii","replace"),entry.author.encode("ascii","replace")))
if entry.has_key("tags"):
tag_list = []
for tag in entry.tags:
#if tag["scheme"] == "tag":
if (
tag["term"] != "Uncategorized"
and tag["term"] not in tag_list
):
tag_list.append(tag["term"])
for atag in tag_list:
of.write("- %s\n" % atag)
of.write("---\n\n")
content_str = entry.content[0].value
of.write(content_str.encode("ascii","replace"))
of.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment