Created
October 14, 2009 20:14
-
-
Save czottmann/210359 to your computer and use it in GitHub Desktop.
Parses a Wordpress XML export file and imports the posts into Tumblr. See http://blog.zottmann.org/post/213103984/the-big-blog-move-on-2009 for info.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/ruby | |
require "hpricot" | |
require "net/http" | |
require "uri" | |
TUMBLR_USER = "[email protected]" | |
TUMBLR_PASS = "thepasswordlol" | |
TUMBLR_DOMAIN = "mytumblrsubdomain.tumblr.com" | |
ORIGINAL_DOMAIN = "http://myoriginaldomain.example.com/" | |
FILE_URLS_TO_TIDS = "old-new.txt" | |
ported_urls = File.readlines(FILE_URLS_TO_TIDS).collect {|l| l.strip.match(/^(.*);/)[1] } | |
doc = Hpricot( File.open("wordpress.2009-10-10.xml") ) | |
(doc/"item").each do |item| | |
comments = item.search("wp:comment_approved").reject {|ct| ct.inner_text != "1" }.size | |
pingbacks = item.search("wp:comment_type").reject {|ct| ct.inner_text != "pingback" }.size | |
is_private = ( item.search("wp:status").first.inner_text == "private" ) | |
has_comments = ( comments > 0 && comments > pingbacks ) | |
tags = item.search("category[@domain='tag']").collect(&:inner_text).uniq | |
next if item.search("wp:post_type").first.inner_text != "post" | |
title = item.search("title").first.inner_text | |
postdate = item.search("wp:post_date").first.inner_text | |
link = item.search("link").first.inner_text | |
# link = ORIGINAL_DOMAIN + item.search("wp:post_date_gmt").first.inner_text[0, 10].gsub(/-/, "/") + "/" + item.search("wp:post_name").first.inner_text + "/" # do this if the link is empty | |
next if ported_urls.include?(link) | |
content = item.search("content:encoded").first.inner_text | |
# My WP blog was using Disqus. I'd like to retain the comments. | |
content += "\n\n<script type='text/javascript'>var disqus_url = '#{link}';</script>" if has_comments | |
next if content.strip.empty? | |
puts "- #{title} [#{comments}/#{pingbacks}]" | |
puts " #{link}" | |
response = Net::HTTP.post_form( | |
URI.parse("http://www.tumblr.com/api/write"), | |
{ | |
:email => TUMBLR_USER, | |
:password => TUMBLR_PASS, | |
:type => "regular", | |
:date => postdate, | |
:title => title, | |
:body => content, | |
:tags => tags.join(","), | |
:format => "markdown", | |
:group => TUMBLR_DOMAIN, | |
:private => is_private ? 1 : 0 | |
} | |
) | |
tumblr_id = response.read_body | |
File.open(FILE_URLS_TO_TIDS, "a") do |f| | |
f.puts "#{link};#{tumblr_id}" | |
f.close | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Do you have a version of this that works with API v2? Theres no scripts out there at all to export from WordPress to Tumblr so it'd be great if you could modify this to work with API v2! This script doesn't work anymore due to API v1 being shut down :(