Created
November 18, 2015 05:26
-
-
Save anselm/4330c9bb0db6a9b079f6 to your computer and use it in GitHub Desktop.
Converts Jim Hightower's site from drupal to wordpress without having to actually log into the drupal site. Makes images and wires podcast media types up etc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'date' | |
require 'net/http' | |
require 'uri' | |
require 'open-uri' | |
require 'rubypress' | |
require 'mime' | |
require 'mime/types' | |
require 'imgkit' | |
# goals | |
# | |
# migrate all podcast types from jimhightower.com/node/8517 and greater to dev.hightowerlowdown.org | |
# done: | |
# in wordpress we want these to show up as podcasts with the correct original date, title and author (jim hightower) | |
# we want the podcast actual audio attached | |
# not done: | |
# the media itself needs to have summaries and a title | |
# the whole thing needs an image | |
# | |
# example usage | |
# | |
# ruby migrate.rb 8517 | |
################################################################################# | |
# Log into Wordpress | |
################################################################################# | |
wp = Rubypress::Client.new(:host => "what", | |
:username => "why", | |
:password => "wha") | |
################################################################################# | |
# Fetch content from old site with parsing | |
# and write to new site | |
################################################################################# | |
def FetchHighTowerPage(wp,urlcode) | |
# Get old site data | |
uri = URI.parse("http://jimhightower.com/node/"+urlcode); | |
response = Net::HTTP.get_response uri | |
# parse | |
audio = response.body.index('commentary-audio') | |
audio = audio + 18 + 9 + 23 | |
audif = response.body[audio..-1].index('>') | |
audif = audio + audif - 2; | |
soundtrackurl = response.body[audio..audif] | |
filenameindex = soundtrackurl.index("files") + 6 | |
filename = soundtrackurl[filenameindex..-1] | |
mimetype = MIME::Types.type_for(filename).first.to_s, | |
head = response.body[audio..-1].index('div') | |
head = audio + head + 4 | |
tail = response.body[head..-1].index('script') | |
tail = head + tail - 4 | |
body = response.body[head..tail] | |
title1 = response.body.index('title') + 22 | |
title2 = response.body.index('/title') - 2 | |
title = response.body[title1..title2] | |
time1 = response.body.index('"info">') + 11 | |
time2 = response.body[time1..-1].index("em") - 3 + time1 | |
timestr = response.body[time1..time2] | |
time = Date.parse(timestr) | |
time = time + 1 # drupal is utc | |
#summary | |
parts = body.scan(/<p>([^<>]*)<\/p/) | |
# parts = body.split(/<p(\/)>/) | |
summary = "#{parts[0][0]}".strip | |
if summary.length < 5 | |
abort("bad summary generation") | |
end | |
# print a few details | |
puts soundtrackurl | |
puts title | |
puts time | |
puts body | |
puts mimetype | |
puts summary | |
# sanity tests | |
if !soundtrackurl || !title || !time || !body || soundtrackurl.length < 4 || title.length < 4 || body.length < 4 | |
abort("Things are going poorly for us; post seems to be empty") | |
end | |
# | |
# post the article to the new server | |
# doing this first for now and will patch up relationships after | |
# | |
postid = 0 | |
if 1 | |
postid = wp.newPost( | |
:blog_id => "0", | |
:content => { | |
:post_status => "publish", | |
:post_type => "podcast", | |
:post_date => time, | |
:post_content => body, | |
:post_title => title, | |
:post_name => title, | |
} | |
) | |
puts "Published post with id " + postid | |
else | |
abort("Such sad cannot post") | |
end | |
# | |
# Fetch sound from old server and store locally for a second | |
# | |
soundsaved = 0 | |
if 1 | |
# get audio and save locally | |
# http://jimhightower.com/sites/jimhightower.civicactions.net/files/33-23_f_show.mp3 | |
puts "Loading sound from " + soundtrackurl | |
puts "Saving sound to " + filename | |
Net::HTTP.start("jimhightower.com") do |http| | |
resp = http.get(soundtrackurl) | |
open(filename, "wb") do |file| | |
file.write(resp.body) | |
end | |
end | |
soundsaved = 1 # will throw exception | |
end | |
# | |
# post the sound to the new server | |
# the post_id is set here to associate the sound with the post - but it is not clear if this is the right thing to do | |
# | |
soundresults = 0 | |
if soundsaved | |
# copying soundtrack to wordpress - the spec says you can set the post_id to associate them | |
soundresults = wp.uploadFile(:data => { | |
:name => filename, | |
:type => mimetype, | |
# :overwrite => 1, #broken | |
:post_id => postid, | |
:bits => XMLRPC::Base64.new(IO.read(filename)) | |
}) | |
puts "saved sound to server id:#{soundresults['id']} file:#{soundresults['file']} url:#{soundresults['url']} type:#{soundresults['type']}" | |
else | |
puts "There appears to be no audio - this is sad making" | |
abort("We canna handle this anymore captain") | |
end | |
soundid = soundresults['id'] | |
soundfile = soundresults['file'] | |
soundurl = soundresults['url'] | |
soundtype = soundresults['type'] | |
soundduration = "2:00" | |
soundfilesizeraw = File.size(filename).to_f | |
soundfilesizefix = soundfilesizeraw / 2**20 | |
soundfilesize = '%.2f' % soundfilesizefix | |
soundfilesize = "#{soundfilesize}M" | |
# | |
# make a pretty backdrop from scratch by reaching into an html painting engine | |
# | |
imagefilename = "#{filename}.jpg" | |
makeme = "<h1>#{title}</h1>#{timestr}" | |
kit = IMGKit.new(makeme, :quality => 80, :width => 600, :height => 400, "crop-w" => 600, "crop-h" => 400, "disable-smart-width" => true, "zoom" => 1.0 ) | |
kit.to_file(imagefilename) | |
puts "made a background image file at #{imagefilename}" | |
# | |
# upload an image | |
# | |
imageid = 0 | |
if 1 | |
imageresults = wp.uploadFile(:data => { | |
:name => imagefilename, | |
:type => 'image/jpeg', | |
# :overwrite => 1, #broken | |
:post_id => postid, | |
:bits => XMLRPC::Base64.new(IO.read(imagefilename)), | |
}) | |
imageid = imageresults['id'] | |
end | |
if imageid == 0 | |
abort("could not upload image") | |
end | |
# | |
# unused | |
# an incantation to associate the thumbnail | |
# it is not totally clear if this is the right thing or if it competes with the above or if both are needed | |
# http://stackoverflow.com/questions/17722743/attach-image-to-post-in-wordpress-xmlrpc | |
# | |
if postid == 0 | |
attachresults = wp.editPost( | |
:blog_id => 0, | |
:post_id => imageid, | |
:content => { | |
:post_type => "attachment", | |
:post_status => "inherit", | |
:post_title => title, | |
:post_name => title, | |
:post_parent => postid, | |
:guid => soundresults['url'], | |
:post_content => "", | |
:post_mime_type => "image/jpeg", | |
} | |
) | |
puts "done attaching the two with result status:" | |
puts attachresults | |
end | |
# | |
# revise the post to set podcast fields and the thumbnail | |
# | |
if postid | |
attachresults = wp.editPost( | |
:blog_id => 0, | |
:post_id => postid, | |
:content => { | |
:post_thumbnail => imageid, | |
:custom_fields => [ | |
{ 'key' => 'enclosure', 'value' => soundresults['url'] }, | |
{ 'key' => 'audio_file', 'value' => soundresults['url'] }, | |
{ 'key' => 'duration', 'value' => soundduration }, | |
{ 'key' => 'filesize', 'value' => soundfilesize }, | |
{ 'key' => 'filesize_raw', 'value' => soundfilesizeraw }, | |
{ 'key' => 'date_recorded','value' => time.strftime("%d-%m-%Y") }, | |
], | |
} | |
) | |
puts "updated post" | |
end | |
# | |
# revise the media blob | |
# | |
if soundid | |
attachresults = wp.editPost( | |
:blog_id => 0, | |
:post_id => soundid, | |
:content => { | |
:post_title => title, | |
:post_caption => title, | |
:post_excerpt => summary, | |
:post_description => summary, | |
:post_content => summary, | |
} | |
) | |
puts "updated the sound description" | |
end | |
# | |
# Revise the image | |
# | |
if imageresults | |
attachresults = wp.editPost( | |
:blog_id => 0, | |
:post_id => imageid, | |
:content => { | |
:post_title => title, | |
:post_caption => title, | |
:post_excerpt => summary, | |
:post_description => summary, | |
:post_content => summary, | |
} | |
) | |
puts "updated the image description" | |
end | |
end | |
################################################################################# | |
# GO! | |
################################################################################# | |
FetchHighTowerPage(wp,ARGV[0]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment