Skip to content

Instantly share code, notes, and snippets.

@jackboberg
Last active December 10, 2015 07:09
Show Gist options
  • Save jackboberg/4399275 to your computer and use it in GitHub Desktop.
Save jackboberg/4399275 to your computer and use it in GitHub Desktop.
Convert wordpress.xml to rails models
@cache_dir = 'lib/assets/'
def hash_from_cache
xml = @cache_dir + 'published.xml'
cache = @cache_dir + 'published.cache.rb'
if ! File.exists?(cache)
data = File.read xml
hash = Hash.from_xml data
File.open(cache, "wb") {|io| Marshal.dump(hash, io)}
else
hash = File.open(cache, "rb") {|io| Marshal.load(io)}
end
end
# --------------------------------------------------------------------
# post methods
def get_tagged_posts(hash)
hash['rss']['channel']['item'].select do |item|
categories = item['category']
if categories.class == String
categories = [categories]
end
['Aside','Main'].each do |ignore|
if index = categories.index(ignore)
categories.delete_at index
end
end
categories.length > 1
end
end
def get_formated_content(post)
content = post['encoded'].first
# TODO
# convert first photo to Model
# wrap with <p>'s
# remove comments
comment_regex = /([<!\-\-].*?[\-\-]\s*>)/
content.gsub!(comment_regex, '')
# convert caption blocks to real HTML
caption_regex = /\[caption .*\](?<image><(a|strong|img)(.*)>)(?<caption>(.*))\[\/caption\]/
caption_replace = '<figure>\k<image><figcaption>\k<caption></figcaption></figure>'
content.gsub!(caption_regex,caption_replace)
image_regex = /^(?<image>(<(a |strong)(.*)>)?<img (.*)\/>(<\/(a|strong)>)?)/
image_replace = '<figure>\k<image></figure>'
content.gsub!(image_regex,image_replace)
content
end
def get_photo(post)
first_photo_regex = /<img.+src=['"]([^'"]+)['"].*>/
post['encoded'][0][first_photo_regex, 1]
end
def get_tags(post)
tags = post['category']
if tags.class == String
tags = [tags]
end
# remove useless tags
['Aside','Main'].each do |ignore|
if index = tags.index(ignore)
tags.delete_at index
end
end
# convert 'foo + bar' into seperate tags
tags.collect! do |tag|
bits = tag.split ' + '
tag = bits.shift
bits.each { |b| tags.push b }
tag
end
tags.push('wp-import')
end
# --------------------------------------------------------------------
# author methods
def get_authors(hash)
hash['rss']['channel']['author']
end
def generate_password(len=10)
o = [('a'..'z'),('A'..'Z')].map{|i| i.to_a}.flatten
(0...len).map{ o[rand(o.length)] }.join
end
# --------------------------------------------------------------------
namespace :wordpress do
task :cache => :environment do
hash = hash_from_cache
# write authors to cache
authors = get_authors(hash)
cache = @cache_dir + 'authors.cache.rb'
File.open(cache, "wb") { |io| Marshal.dump(authors, io) }
# write posts to cache
posts = get_tagged_posts(hash)
cache = @cache_dir + 'posts.cache.rb'
File.open(cache, "wb") { |io| Marshal.dump(posts, io) }
end
task :authors => :environment do
cache = @cache_dir + 'authors.cache.rb'
authors = File.open(cache, "rb") { |io| Marshal.load(io) }
authors.each do |a|
password = generate_password
author = User.create(
email: a['author_email'],
username: a['author_login'],
password: password,
password_confirmation: password,
profile_attributes: {
forename: a['author_first_name'],
surname: a['author_last_name']
}
)
author.add_role :author
end
end
task :posts => :environment do
cache = @cache_dir + 'posts.cache.rb'
posts = File.open(cache, "rb") { |io| Marshal.load(io) }
posts.each do |p|
author = User.find_by_username(p['creator'])
article = author.articles.create(
title: p['title'],
content: get_formated_content(p),
published_at: p['pubDate'],
tag_list: get_tags(p)
)
end
puts Article.count
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment