Created
November 26, 2012 21:02
-
-
Save cageyjames/4150589 to your computer and use it in GitHub Desktop.
WordPress.xml to Markdown (Octopress)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" parse_wordpress_xml.py | |
Takes a WordPress XML export file and converts it to Octopress flavored Markdown files. | |
Author: James Fee (http://github.com/cageyjames) | |
""" | |
import string | |
import sys | |
import feedparser | |
def sanitize_filename(filename): | |
"""Strips special chars from the blog post names. e.g. <title>Suggestions?</title> | |
""" | |
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) | |
return ''.join(c for c in filename if c in valid_chars) | |
# Path to WordPress export xml | |
atom_xml = r"~\Projects\Wordpress\wordpress.2011-09-29.xml" | |
atom_xml = open(atom_xml).read() | |
d = feedparser.parse(atom_xml) # the url of the Atom feed can also be used here | |
for entry in d.entries: | |
if entry["wp_status"] != "publish": continue | |
date = "%s-%s-%s" % (entry.date_parsed[0],entry.date_parsed[1],entry.date_parsed[2]) | |
title = entry.title | |
title = title.replace(" ","-") | |
filename = "./_posts/"+sanitize_filename("%s-%s" % (date,title)) + ".markdown" | |
print filename | |
of = open(filename,"w") | |
of.write("""--- | |
layout: blog | |
title: %s | |
post_author: %s | |
categories: | |
""" % (entry.title.encode("ascii","replace"),entry.author.encode("ascii","replace"))) | |
if entry.has_key("tags"): | |
tag_list = [] | |
for tag in entry.tags: | |
#if tag["scheme"] == "tag": | |
if ( | |
tag["term"] != "Uncategorized" | |
and tag["term"] not in tag_list | |
): | |
tag_list.append(tag["term"]) | |
for atag in tag_list: | |
of.write("- %s\n" % atag) | |
of.write("---\n\n") | |
content_str = entry.content[0].value | |
of.write(content_str.encode("ascii","replace")) | |
of.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment