Last active
December 10, 2015 07:09
-
-
Save jackboberg/4399275 to your computer and use it in GitHub Desktop.
Convert wordpress.xml to rails models
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@cache_dir = 'lib/assets/' | |
def hash_from_cache | |
xml = @cache_dir + 'published.xml' | |
cache = @cache_dir + 'published.cache.rb' | |
if ! File.exists?(cache) | |
data = File.read xml | |
hash = Hash.from_xml data | |
File.open(cache, "wb") {|io| Marshal.dump(hash, io)} | |
else | |
hash = File.open(cache, "rb") {|io| Marshal.load(io)} | |
end | |
end | |
# -------------------------------------------------------------------- | |
# post methods | |
def get_tagged_posts(hash) | |
hash['rss']['channel']['item'].select do |item| | |
categories = item['category'] | |
if categories.class == String | |
categories = [categories] | |
end | |
['Aside','Main'].each do |ignore| | |
if index = categories.index(ignore) | |
categories.delete_at index | |
end | |
end | |
categories.length > 1 | |
end | |
end | |
def get_formated_content(post) | |
content = post['encoded'].first | |
# TODO | |
# convert first photo to Model | |
# wrap with <p>'s | |
# remove comments | |
comment_regex = /([<!\-\-].*?[\-\-]\s*>)/ | |
content.gsub!(comment_regex, '') | |
# convert caption blocks to real HTML | |
caption_regex = /\[caption .*\](?<image><(a|strong|img)(.*)>)(?<caption>(.*))\[\/caption\]/ | |
caption_replace = '<figure>\k<image><figcaption>\k<caption></figcaption></figure>' | |
content.gsub!(caption_regex,caption_replace) | |
image_regex = /^(?<image>(<(a |strong)(.*)>)?<img (.*)\/>(<\/(a|strong)>)?)/ | |
image_replace = '<figure>\k<image></figure>' | |
content.gsub!(image_regex,image_replace) | |
content | |
end | |
def get_photo(post) | |
first_photo_regex = /<img.+src=['"]([^'"]+)['"].*>/ | |
post['encoded'][0][first_photo_regex, 1] | |
end | |
def get_tags(post) | |
tags = post['category'] | |
if tags.class == String | |
tags = [tags] | |
end | |
# remove useless tags | |
['Aside','Main'].each do |ignore| | |
if index = tags.index(ignore) | |
tags.delete_at index | |
end | |
end | |
# convert 'foo + bar' into seperate tags | |
tags.collect! do |tag| | |
bits = tag.split ' + ' | |
tag = bits.shift | |
bits.each { |b| tags.push b } | |
tag | |
end | |
tags.push('wp-import') | |
end | |
# -------------------------------------------------------------------- | |
# author methods | |
def get_authors(hash) | |
hash['rss']['channel']['author'] | |
end | |
def generate_password(len=10) | |
o = [('a'..'z'),('A'..'Z')].map{|i| i.to_a}.flatten | |
(0...len).map{ o[rand(o.length)] }.join | |
end | |
# -------------------------------------------------------------------- | |
namespace :wordpress do | |
task :cache => :environment do | |
hash = hash_from_cache | |
# write authors to cache | |
authors = get_authors(hash) | |
cache = @cache_dir + 'authors.cache.rb' | |
File.open(cache, "wb") { |io| Marshal.dump(authors, io) } | |
# write posts to cache | |
posts = get_tagged_posts(hash) | |
cache = @cache_dir + 'posts.cache.rb' | |
File.open(cache, "wb") { |io| Marshal.dump(posts, io) } | |
end | |
task :authors => :environment do | |
cache = @cache_dir + 'authors.cache.rb' | |
authors = File.open(cache, "rb") { |io| Marshal.load(io) } | |
authors.each do |a| | |
password = generate_password | |
author = User.create( | |
email: a['author_email'], | |
username: a['author_login'], | |
password: password, | |
password_confirmation: password, | |
profile_attributes: { | |
forename: a['author_first_name'], | |
surname: a['author_last_name'] | |
} | |
) | |
author.add_role :author | |
end | |
end | |
task :posts => :environment do | |
cache = @cache_dir + 'posts.cache.rb' | |
posts = File.open(cache, "rb") { |io| Marshal.load(io) } | |
posts.each do |p| | |
author = User.find_by_username(p['creator']) | |
article = author.articles.create( | |
title: p['title'], | |
content: get_formated_content(p), | |
published_at: p['pubDate'], | |
tag_list: get_tags(p) | |
) | |
end | |
puts Article.count | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment