Skip to content

Instantly share code, notes, and snippets.

@pfleidi
Created March 31, 2012 11:57
Show Gist options
  • Select an option

  • Save pfleidi/2262842 to your computer and use it in GitHub Desktop.

Select an option

Save pfleidi/2262842 to your computer and use it in GitHub Desktop.
Slightly modified version of stock wordpress -> jekyll importer
# coding: utf-8
require 'rubygems'
require 'hpricot'
require 'fileutils'
require 'yaml'
require 'time'
module Jekyll
# This importer takes a wordpress.xml file, which can be exported from your
# wordpress.com blog (/wp-admin/export.php).
UrlSyntax = /(((\s*,?)(https?:\/\/)(\S+))+)/
MarkdownLink = /^\[(.*)\]\((#{UrlSyntax})\)[\s]*/
HTMLLink = /^<a.*href=["'](#{UrlSyntax})["'].*>[\s]*/
module WordpressDotCom
def self.process(filename = "wordpress.xml")
import_count = Hash.new(0)
doc = Hpricot::XML(File.read(filename))
(doc/:channel/:item).each do |item|
title = item.at(:title).inner_text.strip
permalink_title = item.at('wp:post_name').inner_text
# Fallback to "prettified" title if post_name is empty (can happen)
if permalink_title.empty?
permalink_title = title.downcase.split.join('-')
end
date = Time.parse(item.at('wp:post_date').inner_text)
status = item.at('wp:status').inner_text
published = status == "publish"
type = item.at('wp:post_type').inner_text
categories = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
metas = {}
item.search("wp:postmeta").each do |meta|
key = meta.at('wp:meta_key').inner_text
value = meta.at('wp:meta_value').inner_text
metas[key] = value
end
name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.markdown"
header = {
'layout' => type,
'title' => title,
'categories' => categories,
'status' => status,
'type' => type,
'published' => published,
'meta' => metas
}
content = item.at('content:encoded').inner_text
external_url = case content
when MarkdownLink
content.sub!(MarkdownLink, "")
$2.strip
when HTMLLink
content.sub!(HTMLLink, "")
$1.strip
else ""
end
header["external-url"] = external_url unless external_url.empty?
FileUtils.mkdir_p "_#{type}s"
File.open("_#{type}s/#{name}", "w") do |f|
f.puts header.to_yaml
f.puts '---'
f.puts content
end
import_count[type] += 1
end
import_count.each do |key, value|
puts "Imported #{value} #{key}s"
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment