Created
August 2, 2010 15:03
-
-
Save ledermann/504775 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import a posterous.com Blog into nanoc (http://nanoc.stoneship.org) | |
# Uses Nokogiri to parse the XML data from the posterous API | |
require 'rubygems' | |
require 'nanoc3' | |
require 'nokogiri' | |
require 'open-uri' | |
module Nanoc3::Extra | |
module Importers | |
class Posterous | |
def initialize(hostname, user, password) | |
@hostname = hostname | |
@user = user | |
@password = password | |
@site = Nanoc3::Site.new('.') | |
end | |
def run | |
page = 1 | |
sum = 0 | |
loop do | |
puts "Processing page #{page}..." | |
file = open("http://posterous.com/api/readposts?hostname=#{@hostname}&page=#{page}", :http_basic_authentication => [@user, @password]) | |
doc = Nokogiri::XML(file.read) | |
count = process(doc) | |
break if count == 0 | |
page += 1 | |
sum += count | |
end | |
puts "Finished. Imported #{sum} articles." | |
end | |
def process(xml) | |
count = 0 | |
xml.xpath('//rsp/post').each do |post| | |
count += 1 | |
date = post.xpath('date').text | |
title = post.xpath('title').text | |
author = post.xpath('author').text | |
body = Nokogiri::HTML(post.xpath('body').text) | |
media = [] | |
body.xpath("//a[contains(@href, 'getfile')]").each do |element| | |
media << element.attributes['href'].text | |
end | |
puts "- #{title}" | |
# Build post name | |
post_name = title.downcase. | |
gsub(/[^a-z\-_]/, '-'). | |
gsub(/^-+|-+|-+$/, '-') | |
post_date = Date.parse(date) | |
content = body.text.gsub('See the full gallery on posterous',''). | |
gsub('Watch on posterous',''). | |
gsub(/\(\d+ KB\)/,''). | |
strip | |
# Create text item | |
identifier = '/articles/' + post_date.year.to_s + '/' + '%02d' % post_date.month + '/' + '%02d' % post_date.day + '-' + post_name + '/index/' | |
attributes = { :title => title, | |
:author => author } | |
@site.data_sources[0].create_item(content, attributes, identifier, :extension => '.txt') | |
# Download media and create items | |
media.each do |url| | |
puts "Download #{url}" | |
file = open(url) | |
content = file.read | |
file_name = url.split('/').last.gsub(/\.scaled.+\.jpg/i,'').gsub(/\.jpg/i,'') | |
extension = File.extname(url) | |
identifier = '/articles/' + post_date.year.to_s + '/' + '%02d' % post_date.month + '/' + '%02d' % post_date.day + '-' + post_name + '/' + file_name + '/' | |
@site.data_sources[0].create_item(content, {}, identifier, :extension => extension) | |
end | |
end | |
count | |
end | |
end | |
end | |
end | |
begin | |
importer = Nanoc3::Extra::Importers::Posterous.new('my_hostname', 'steve', 'secret') | |
importer.run | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment