jackboberg · December 10, 2015 07:09
diff --git a/wordpress.rake b/wordpress.rake
 @cache_dir = 'lib/assets/'

 def hash_from_cache
  xml = @cache_dir + 'published.xml'
  cache = @cache_dir + 'published.cache.rb'

  if ! File.exists?(cache)
    data = File.read xml
    hash = Hash.from_xml data
    File.open(cache, "wb") {|io| Marshal.dump(hash, io)}
  else
    hash = File.open(cache, "rb") {|io| Marshal.load(io)}
  end
 end

 # --------------------------------------------------------------------
 # post methods

 def get_tagged_posts(hash)
  hash['rss']['channel']['item'].select do |item|
    categories = item['category']
    if categories.class == String
      categories = [categories]
    end
    ['Aside','Main'].each do |ignore|
      if index = categories.index(ignore)
        categories.delete_at index
      end
    end
    categories.length > 1
  end
 end

 def get_formated_content(post)
  content = post['encoded'].first
  # TODO
  # convert first photo to Model
  # wrap with <p>'s

  # remove comments
  comment_regex = /([<!\-\-].*?[\-\-]\s*>)/
  content.gsub!(comment_regex, '')

  # convert caption blocks to real HTML
  caption_regex = /\[caption .*\](?<image><(a|strong|img)(.*)>)(?<caption>(.*))\[\/caption\]/
  caption_replace = '<figure>\k<image><figcaption>\k<caption></figcaption></figure>'
  content.gsub!(caption_regex,caption_replace)

  image_regex = /^(?<image>(<(a |strong)(.*)>)?<img (.*)\/>(<\/(a|strong)>)?)/
  image_replace = '<figure>\k<image></figure>'
  content.gsub!(image_regex,image_replace)

  content
 end

 def get_photo(post)
  first_photo_regex = /<img.+src=['"]([^'"]+)['"].*>/
  post['encoded'][0][first_photo_regex, 1]
 end

 def get_tags(post)
  tags = post['category']
  if tags.class == String
    tags = [tags]
  end
  # remove useless tags
  ['Aside','Main'].each do |ignore|
    if index = tags.index(ignore)
      tags.delete_at index
    end
  end
  # convert 'foo + bar' into seperate tags
  tags.collect! do |tag|
    bits = tag.split ' + '
    tag = bits.shift
    bits.each { |b| tags.push b }
    tag
  end
  tags.push('wp-import')
 end

 # --------------------------------------------------------------------
 # author methods

 def get_authors(hash)
  hash['rss']['channel']['author']
 end

 def generate_password(len=10)
  o =  [('a'..'z'),('A'..'Z')].map{|i| i.to_a}.flatten
  (0...len).map{ o[rand(o.length)] }.join
 end

 # --------------------------------------------------------------------

 namespace :wordpress do

  task :cache => :environment do
    hash = hash_from_cache

    # write authors to cache
    authors = get_authors(hash)
    cache = @cache_dir + 'authors.cache.rb'
    File.open(cache, "wb") { |io| Marshal.dump(authors, io) }

    # write posts to cache
    posts = get_tagged_posts(hash)
    cache = @cache_dir + 'posts.cache.rb'
    File.open(cache, "wb") { |io| Marshal.dump(posts, io) }
  end


  task :authors => :environment do
    cache = @cache_dir + 'authors.cache.rb'
    authors = File.open(cache, "rb") { |io| Marshal.load(io) }

    authors.each do |a|
      password = generate_password
      author = User.create(
        email: a['author_email'],
        username: a['author_login'],
        password: password,
        password_confirmation: password,
        profile_attributes: {
          forename: a['author_first_name'],
          surname: a['author_last_name']
        }
      )
      author.add_role :author
    end
  end

  task :posts => :environment do
    cache = @cache_dir + 'posts.cache.rb'
    posts = File.open(cache, "rb") { |io| Marshal.load(io) }

    posts.each do |p|
      author = User.find_by_username(p['creator'])
      article = author.articles.create(
        title: p['title'],
        content: get_formated_content(p),
        published_at: p['pubDate'],
        tag_list: get_tags(p)
      )
    end
    puts Article.count
  end

 end
	@cache_dir = 'lib/assets/'

	def hash_from_cache
	xml = @cache_dir + 'published.xml'
	cache = @cache_dir + 'published.cache.rb'

	if ! File.exists?(cache)
	data = File.read xml
	hash = Hash.from_xml data
	File.open(cache, "wb") {\|io\| Marshal.dump(hash, io)}
	else
	hash = File.open(cache, "rb") {\|io\| Marshal.load(io)}
	end
	end

	# --------------------------------------------------------------------
	# post methods

	def get_tagged_posts(hash)
	hash['rss']['channel']['item'].select do \|item\|
	categories = item['category']
	if categories.class == String
	categories = [categories]
	end
	['Aside','Main'].each do \|ignore\|
	if index = categories.index(ignore)
	categories.delete_at index
	end
	end
	categories.length > 1
	end
	end

	def get_formated_content(post)
	content = post['encoded'].first
	# TODO
	# convert first photo to Model
	# wrap with <p>'s

	# remove comments
	comment_regex = /([<!\-\-].?[\-\-]\s>)/
	content.gsub!(comment_regex, '')

	# convert caption blocks to real HTML
	caption_regex = /\[caption .\](?<image><(a\|strong\|img)(.)>)(?<caption>(.*))\[\/caption\]/
	caption_replace = '<figure>\k<image><figcaption>\k<caption></figcaption></figure>'
	content.gsub!(caption_regex,caption_replace)

	image_regex = /^(?<image>(<(a \|strong)(.)>)?<img (.)\/>(<\/(a\|strong)>)?)/
	image_replace = '<figure>\k<image></figure>'
	content.gsub!(image_regex,image_replace)

	content
	end

	def get_photo(post)
	first_photo_regex = /<img.+src=['"]([^'"]+)['"].*>/
	post['encoded'][0][first_photo_regex, 1]
	end

	def get_tags(post)
	tags = post['category']
	if tags.class == String
	tags = [tags]
	end
	# remove useless tags
	['Aside','Main'].each do \|ignore\|
	if index = tags.index(ignore)
	tags.delete_at index
	end
	end
	# convert 'foo + bar' into seperate tags
	tags.collect! do \|tag\|
	bits = tag.split ' + '
	tag = bits.shift
	bits.each { \|b\| tags.push b }
	tag
	end
	tags.push('wp-import')
	end

	# --------------------------------------------------------------------
	# author methods

	def get_authors(hash)
	hash['rss']['channel']['author']
	end

	def generate_password(len=10)
	o = [('a'..'z'),('A'..'Z')].map{\|i\| i.to_a}.flatten
	(0...len).map{ o[rand(o.length)] }.join
	end

	# --------------------------------------------------------------------

	namespace :wordpress do

	task :cache => :environment do
	hash = hash_from_cache

	# write authors to cache
	authors = get_authors(hash)
	cache = @cache_dir + 'authors.cache.rb'
	File.open(cache, "wb") { \|io\| Marshal.dump(authors, io) }

	# write posts to cache
	posts = get_tagged_posts(hash)
	cache = @cache_dir + 'posts.cache.rb'
	File.open(cache, "wb") { \|io\| Marshal.dump(posts, io) }
	end


	task :authors => :environment do
	cache = @cache_dir + 'authors.cache.rb'
	authors = File.open(cache, "rb") { \|io\| Marshal.load(io) }

	authors.each do \|a\|
	password = generate_password
	author = User.create(
	email: a['author_email'],
	username: a['author_login'],
	password: password,
	password_confirmation: password,
	profile_attributes: {
	forename: a['author_first_name'],
	surname: a['author_last_name']
	}
	)
	author.add_role :author
	end
	end

	task :posts => :environment do
	cache = @cache_dir + 'posts.cache.rb'
	posts = File.open(cache, "rb") { \|io\| Marshal.load(io) }

	posts.each do \|p\|
	author = User.find_by_username(p['creator'])
	article = author.articles.create(
	title: p['title'],
	content: get_formated_content(p),
	published_at: p['pubDate'],
	tag_list: get_tags(p)
	)
	end
	puts Article.count
	end

	end