Skip to content

Instantly share code, notes, and snippets.

@anselm
Created November 18, 2015 05:26
Show Gist options
  • Save anselm/4330c9bb0db6a9b079f6 to your computer and use it in GitHub Desktop.
Save anselm/4330c9bb0db6a9b079f6 to your computer and use it in GitHub Desktop.
Converts Jim Hightower's site from drupal to wordpress without having to actually log into the drupal site. Makes images and wires podcast media types up etc.
require 'date'
require 'net/http'
require 'uri'
require 'open-uri'
require 'rubypress'
require 'mime'
require 'mime/types'
require 'imgkit'
# goals
#
# migrate all podcast types from jimhightower.com/node/8517 and greater to dev.hightowerlowdown.org
# done:
# in wordpress we want these to show up as podcasts with the correct original date, title and author (jim hightower)
# we want the podcast actual audio attached
# not done:
# the media itself needs to have summaries and a title
# the whole thing needs an image
#
# example usage
#
# ruby migrate.rb 8517
#################################################################################
# Log into Wordpress
#################################################################################
wp = Rubypress::Client.new(:host => "what",
:username => "why",
:password => "wha")
#################################################################################
# Fetch content from old site with parsing
# and write to new site
#################################################################################
def FetchHighTowerPage(wp,urlcode)
# Get old site data
uri = URI.parse("http://jimhightower.com/node/"+urlcode);
response = Net::HTTP.get_response uri
# parse
audio = response.body.index('commentary-audio')
audio = audio + 18 + 9 + 23
audif = response.body[audio..-1].index('>')
audif = audio + audif - 2;
soundtrackurl = response.body[audio..audif]
filenameindex = soundtrackurl.index("files") + 6
filename = soundtrackurl[filenameindex..-1]
mimetype = MIME::Types.type_for(filename).first.to_s,
head = response.body[audio..-1].index('div')
head = audio + head + 4
tail = response.body[head..-1].index('script')
tail = head + tail - 4
body = response.body[head..tail]
title1 = response.body.index('title') + 22
title2 = response.body.index('/title') - 2
title = response.body[title1..title2]
time1 = response.body.index('"info">') + 11
time2 = response.body[time1..-1].index("em") - 3 + time1
timestr = response.body[time1..time2]
time = Date.parse(timestr)
time = time + 1 # drupal is utc
#summary
parts = body.scan(/<p>([^<>]*)<\/p/)
# parts = body.split(/<p(\/)>/)
summary = "#{parts[0][0]}".strip
if summary.length < 5
abort("bad summary generation")
end
# print a few details
puts soundtrackurl
puts title
puts time
puts body
puts mimetype
puts summary
# sanity tests
if !soundtrackurl || !title || !time || !body || soundtrackurl.length < 4 || title.length < 4 || body.length < 4
abort("Things are going poorly for us; post seems to be empty")
end
#
# post the article to the new server
# doing this first for now and will patch up relationships after
#
postid = 0
if 1
postid = wp.newPost(
:blog_id => "0",
:content => {
:post_status => "publish",
:post_type => "podcast",
:post_date => time,
:post_content => body,
:post_title => title,
:post_name => title,
}
)
puts "Published post with id " + postid
else
abort("Such sad cannot post")
end
#
# Fetch sound from old server and store locally for a second
#
soundsaved = 0
if 1
# get audio and save locally
# http://jimhightower.com/sites/jimhightower.civicactions.net/files/33-23_f_show.mp3
puts "Loading sound from " + soundtrackurl
puts "Saving sound to " + filename
Net::HTTP.start("jimhightower.com") do |http|
resp = http.get(soundtrackurl)
open(filename, "wb") do |file|
file.write(resp.body)
end
end
soundsaved = 1 # will throw exception
end
#
# post the sound to the new server
# the post_id is set here to associate the sound with the post - but it is not clear if this is the right thing to do
#
soundresults = 0
if soundsaved
# copying soundtrack to wordpress - the spec says you can set the post_id to associate them
soundresults = wp.uploadFile(:data => {
:name => filename,
:type => mimetype,
# :overwrite => 1, #broken
:post_id => postid,
:bits => XMLRPC::Base64.new(IO.read(filename))
})
puts "saved sound to server id:#{soundresults['id']} file:#{soundresults['file']} url:#{soundresults['url']} type:#{soundresults['type']}"
else
puts "There appears to be no audio - this is sad making"
abort("We canna handle this anymore captain")
end
soundid = soundresults['id']
soundfile = soundresults['file']
soundurl = soundresults['url']
soundtype = soundresults['type']
soundduration = "2:00"
soundfilesizeraw = File.size(filename).to_f
soundfilesizefix = soundfilesizeraw / 2**20
soundfilesize = '%.2f' % soundfilesizefix
soundfilesize = "#{soundfilesize}M"
#
# make a pretty backdrop from scratch by reaching into an html painting engine
#
imagefilename = "#{filename}.jpg"
makeme = "<h1>#{title}</h1>#{timestr}"
kit = IMGKit.new(makeme, :quality => 80, :width => 600, :height => 400, "crop-w" => 600, "crop-h" => 400, "disable-smart-width" => true, "zoom" => 1.0 )
kit.to_file(imagefilename)
puts "made a background image file at #{imagefilename}"
#
# upload an image
#
imageid = 0
if 1
imageresults = wp.uploadFile(:data => {
:name => imagefilename,
:type => 'image/jpeg',
# :overwrite => 1, #broken
:post_id => postid,
:bits => XMLRPC::Base64.new(IO.read(imagefilename)),
})
imageid = imageresults['id']
end
if imageid == 0
abort("could not upload image")
end
#
# unused
# an incantation to associate the thumbnail
# it is not totally clear if this is the right thing or if it competes with the above or if both are needed
# http://stackoverflow.com/questions/17722743/attach-image-to-post-in-wordpress-xmlrpc
#
if postid == 0
attachresults = wp.editPost(
:blog_id => 0,
:post_id => imageid,
:content => {
:post_type => "attachment",
:post_status => "inherit",
:post_title => title,
:post_name => title,
:post_parent => postid,
:guid => soundresults['url'],
:post_content => "",
:post_mime_type => "image/jpeg",
}
)
puts "done attaching the two with result status:"
puts attachresults
end
#
# revise the post to set podcast fields and the thumbnail
#
if postid
attachresults = wp.editPost(
:blog_id => 0,
:post_id => postid,
:content => {
:post_thumbnail => imageid,
:custom_fields => [
{ 'key' => 'enclosure', 'value' => soundresults['url'] },
{ 'key' => 'audio_file', 'value' => soundresults['url'] },
{ 'key' => 'duration', 'value' => soundduration },
{ 'key' => 'filesize', 'value' => soundfilesize },
{ 'key' => 'filesize_raw', 'value' => soundfilesizeraw },
{ 'key' => 'date_recorded','value' => time.strftime("%d-%m-%Y") },
],
}
)
puts "updated post"
end
#
# revise the media blob
#
if soundid
attachresults = wp.editPost(
:blog_id => 0,
:post_id => soundid,
:content => {
:post_title => title,
:post_caption => title,
:post_excerpt => summary,
:post_description => summary,
:post_content => summary,
}
)
puts "updated the sound description"
end
#
# Revise the image
#
if imageresults
attachresults = wp.editPost(
:blog_id => 0,
:post_id => imageid,
:content => {
:post_title => title,
:post_caption => title,
:post_excerpt => summary,
:post_description => summary,
:post_content => summary,
}
)
puts "updated the image description"
end
end
#################################################################################
# GO!
#################################################################################
FetchHighTowerPage(wp,ARGV[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment