Created
May 14, 2012 01:43
-
-
Save major/2691197 to your computer and use it in GitHub Desktop.
Exporting rackerhacker.com from Wordpress to Markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import codecs | |
import feedparser | |
import html2text | |
from pprint import pprint | |
import sys | |
import time | |
def slugmaker(linktext): | |
"""Turn a post link into a slug""" | |
linkpieces = linktext.split('/') | |
slugpieces = linkpieces[3:7] | |
return "/%s" % '/'.join(slugpieces) | |
# Forget line wrapping | |
html2text.BODY_WIDTH=0 | |
# Pick up the XML export | |
d = feedparser.parse('rackerhacker.wordpress.2012-05-13.xml') | |
for entry in d.entries: | |
# Skip if it's an unpublished entry | |
if entry.wp_status != 'publish': | |
continue | |
# Skip if it's not a post | |
if entry.wp_post_type != 'post': | |
continue | |
# Empty dict to hold this posts' data | |
elements = {} | |
# The easy stuff | |
elements['title'] = entry.title | |
elements['slug'] = slugmaker(entry.link) | |
elements['date'] = time.strftime('%Y-%m-%d %H:%M', entry.published_parsed) | |
elements['author'] = "Major Hayden" | |
# Get the tags (if they exist) | |
taglist = [] | |
if hasattr(entry, 'tags'): | |
for tag in entry.tags: | |
if tag.scheme == 'post_tag': | |
taglist.append(tag.term) | |
elements['tags'] = ', '.join(taglist) | |
# Snag the actual content of the post | |
elements['content'] = html2text.html2text(entry.content[0].value) | |
# Generate a filename to hold the markdown text | |
elements['filename'] = "%s-%s.md" % ( | |
time.strftime('%Y-%m-%d', entry.published_parsed), | |
entry.link.split('/')[6]) | |
# Basic markdown template | |
file_contents = """Title: %(title)s | |
Date: %(date)s | |
Slug: %(slug)s | |
Tags: %(tags)s | |
Author: %(author)s | |
%(content)s | |
""" % elements | |
# Write the data (I hate unicode) | |
with codecs.open("src/%s" % elements['filename'], 'wb', "utf-8") as f: | |
f.write(file_contents) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment