-
-
Save evanwalsh/6131008 to your computer and use it in GitHub Desktop.
| # coding: utf-8 | |
| require 'rubygems' | |
| require 'hpricot' | |
| require 'fileutils' | |
| require 'safe_yaml' | |
| require 'time' | |
| module JekyllImport | |
| # This importer takes a wordpress.xml file, which can be exported from your | |
| # wordpress.com blog (/wp-admin/export.php). | |
| module WordpressDotCom | |
| def self.process(filename = {:source => "wordpress.xml"}) | |
| import_count = Hash.new(0) | |
| doc = Hpricot::XML(File.read(filename[:source])) | |
| (doc/:channel/:item).each do |item| | |
| title = item.at(:title).inner_text.strip | |
| permalink_title = item.at('wp:post_name').inner_text.gsub("/","-") | |
| # Fallback to "prettified" title if post_name is empty (can happen) | |
| if permalink_title == "" | |
| permalink_title = sluggify(title) | |
| end | |
| if item.at('wp:post_date') | |
| begin | |
| date = Time.parse(item.at('wp:post_date').inner_text) | |
| rescue | |
| date = Time.now | |
| end | |
| else | |
| date = Time.now | |
| end | |
| status = item.at('wp:status').inner_text | |
| if status == "publish" | |
| published = true | |
| else | |
| published = false | |
| end | |
| type = item.at('wp:post_type').inner_text | |
| categories = item.search('category[@domain="category"]').map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq | |
| tags = item.search('category[@domain="post_tag"]').map{|t| t.inner_text}.uniq | |
| metas = Hash.new | |
| item.search("wp:postmeta").each do |meta| | |
| key = meta.at('wp:meta_key').inner_text | |
| value = meta.at('wp:meta_value').inner_text | |
| metas[key] = value; | |
| end | |
| name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html" | |
| header = { | |
| 'layout' => type, | |
| 'title' => title, | |
| 'categories' => categories, | |
| 'tags' => tags, | |
| 'status' => status, | |
| 'type' => type, | |
| 'published' => published, | |
| 'meta' => metas | |
| } | |
| begin | |
| FileUtils.mkdir_p "_#{type}s" | |
| File.open("_#{type}s/#{name}", "w") do |f| | |
| f.puts header.to_yaml | |
| f.puts '---' | |
| f.puts item.at('content:encoded').inner_text | |
| end | |
| rescue => e | |
| puts "Couldn't import post!" | |
| puts "Title: #{title}" | |
| puts "Name/Slug: #{name}\n" | |
| puts "Error: #{e.message}" | |
| next | |
| end | |
| import_count[type] += 1 | |
| end | |
| import_count.each do |key, value| | |
| puts "Imported #{value} #{key}s" | |
| end | |
| end | |
| def self.sluggify(title) | |
| title.gsub(/[^[:alnum:]]+/, '-').downcase | |
| end | |
| end | |
| end | |
| JekyllImport::WordpressDotCom.process |
| #!/usr/bin/env ruby | |
| require 'html2markdown' | |
| POST_REGEX = %r{(?<year>[0-9]+)-(?<month>[0-9]+)-(?<day>[0-9]+)-(?<title>.*).html} | |
| files = Dir.glob('*.html').select{ |f| f.match POST_REGEX } | |
| files.each do |post| | |
| data = post.match(POST_REGEX) | |
| p = HTMLPage.new(contents: File.read(post)) | |
| File.open(post, 'w') { |f| f.puts p.markdown } | |
| File.rename(post, "#{data[:year]}-#{data[:month]}-#{data[:day]}-#{data[:title]}.md") | |
| end |
to use this, create new files in the root of your jekyll directory with the names and contents from above.
This was my folder structure when I ran the import:
.
├── CNAME
├── Gemfile
├── Gemfile.lock
├── Squarespace-Wordpress-Export-05-09-2017_wordpress.xml
├── _attachments
├── _config.yml
├── _pages
├── _posts
├── _site
├── about.md
├── import.rb
├── index.md
└── rename.rb
copy and paste the contents of the two above files into their respective locations.
Then, in import.rb, on line 29, change it to:
def self.process(filename = {:source => "<YOUR FILE HERE>"})I'm no expert, but this might help some others who are trying to use this process.
Oh, and I used the fork created by @spiffytech to import images. It worked like a dream. (I hit a few broken image links, and just manually deleted them from the wordpress XML file, and re-ran the import.)
Finally, to run the whole thing, in your command line, just type ruby import.rb.
Good luck!
Hi - great importer, thank you very much. One question: it does not seem to import multiple authors. Is there a way to change this?
Thanks.
Worked like a charm. Thank you.
Thank you! It helped a lot!
I would love to use this, but as a noob I don't know how. Can someone give me a rundown as to how to use this? Thanks to anyone who helps.