Skip to content

Instantly share code, notes, and snippets.

@PotHix
Last active August 29, 2015 14:21
Show Gist options
  • Save PotHix/e78c75281d4f1c508e89 to your computer and use it in GitHub Desktop.
Save PotHix/e78c75281d4f1c508e89 to your computer and use it in GitHub Desktop.
A simple script to convert the export file from wordpress to middleman blog style
require "nokogiri"
require "upmark"
require "time"
BLACKLIST = ["pothix", "ilmo francisco m. melo"]
def tags_for element
element.xpath("category").map do |cat|
cat.text.downcase if cat["domain"] == "post_tag"
end.compact
end
def category_for element
element.xpath("category").find do |cat|
cat["domain"] == "category"
end.text.downcase
end
def comments_authors_for element
comments = element.xpath("wp:comment")
comments.map do |comment|
type = comment.at_xpath("wp:comment_type").text
name = comment.at_xpath("wp:comment_author").text
name if type != "pingback" && BLACKLIST.all?{|b| b != name.downcase}
end.compact
end
File.open("pothix.wordpress.2015-05-25.xml") do |file|
items = Nokogiri::XML(file).xpath("//channel//item")
items.each do |article|
com_authors = comments_authors_for article
tags = tags_for article
category = category_for article
if com_authors.size > 0
comments_str = "\n\n\n\n_Old comments by: #{com_authors.join(", ")} | Not available anymore. :(_"
end
name = article.at_xpath("wp:post_name").text.strip
date_str = article.at_xpath("wp:post_date_gmt").text + " +0000"
date_utc = Time.parse(date_str).utc
date_str = date_utc.strftime("%Y-%m-%d")
filename = "#{date_str}-#{name}.markdown"
begin
content = Upmark.convert(article.at_xpath("content:encoded").text)
rescue => e
name = article.at_xpath("wp:post_name").text.strip
puts "error given when parsing '#{name}'. Error: #{e.inspect}"
end
title = article.at_xpath("title").text
post = <<-eos
---
title: "#{title.gsub(/\[(.*)\]/, '\1:')}"
date: "#{date_str}"
tags: "#{tags.join(",")}"
category: "#{category}"
---
#{content}#{comments_str}
eos
File.open('posts/'+filename, 'w') {|f| f.puts post }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment