ashfurrow · October 21, 2014 20:06
diff --git a/Readme.md b/Readme.md
diff --git a/script.rb b/script.rb
 #!/usr/bin/ruby

 require 'Nokogiri'
 require 'net/http'
 require 'securerandom'
 require 'FileUtils'
 require 'date'

 class Post 
  attr_accessor :title
  attr_accessor :raw_content
  attr_accessor :pub_date
  attr_accessor :link

  def initialize(node)
    @title = node.xpath("title").text 
    @raw_content = node.xpath("content:encoded").text
    @pub_date = node.xpath("pubDate").text
    @link = node.xpath("link").text
  end

  def generate_markdown
    content_node = Nokogiri::HTML("<import>#{ @raw_content }</import>")

    # Find all images on the Squarespace CDN
    images = content_node.xpath("//img").select { |image| image["src"] and image["src"].include? "squarespace.com" }

    images.each { |image| 
      # download them and put them in the correct folder

      url = URI.parse(image["src"])
      response = Net::HTTP.get_response url
      content_type = response["Content-Type"]

      extension = ""
      if content_type.include? "image/jpg" || content_type.include? "image/jpeg"
        extension = "jpg"
      elsif content_type.include? "image/gif"
        extension = "gif"
      elsif content_type.include? "image/png"
        extension = "png"
      else
        puts "Warning – unknown file (defaulting to jpeg): #{ image["src"] }"
        extension = "jpeg"
      end

      directory = "img/import#{ link }/"
      filename = "#{ directory }#{ SecureRandom.uuid.gsub('-','').upcase }.#{ extension }"

      FileUtils.mkdir_p directory
      IO.write(filename, response.body)

      puts "Wrote #{ filename } to disk"

      # Change the content_node's img children to point to their new files

      image["src"] = "/#{ filename }"
      image["class"] = "img-responsive"
    }

    # Fix all the iframes
    iframes = content_node.xpath("//iframe")
    iframes.wrap("<div class='embed-responsive embed-responsive-16by9'></div>")
    iframes.each { |iframe| iframe["class"] = "embed-responsive-item" }

    # Remove Instagram ickiness

    content_node.xpath("//div[contains(@class,\"instagram-oembed\")]/p").each { |node| node.remove }

    # Finally, generate the markdown file from the content_node

    date = Date.parse(@pub_date)

    directory = "blog"
    FileUtils.mkdir_p directory
    filename = "#{ directory }/#{ date.strftime('%Y-%m-%d') }-#{ @title.gsub(' ', '-').gsub(/['"?\/:]/, '').downcase }.markdown"

    body = <<-eos
 ---
 title: "#{ @title}"
 date: #{ date.strftime('%Y-%m-%d %H:%M') }
 ---

 #{ content_node.xpath("//import").first }

 <!-- more -->

    eos

    IO.write(filename, body)

    puts "Wrote #{ filename }"
  end

  def to_s
    "#{ @title } published on #{ pub_date }"
  end
 end

 filename = ARGV.first
 abort "Usage: ./script path_of_xml_file" unless filename
 abort "File does not exist" unless  File.exist?(filename)

 file = File.open(filename)
 doc = Nokogiri::XML(file)
 file.close

 puts "Opened XML file at " + filename

 post_nodes = doc.xpath("//item").select { | item | item.xpath("wp:post_type/text()").text == "post" && item.xpath("wp:status/text()").text == "publish" }

 posts = post_nodes.map { |node| Post.new(node) }

 # puts posts[0].generate_markdown
 posts.each { |post| post.generate_markdown }
	#!/usr/bin/ruby

	require 'Nokogiri'
	require 'net/http'
	require 'securerandom'
	require 'FileUtils'
	require 'date'

	class Post
	attr_accessor :title
	attr_accessor :raw_content
	attr_accessor :pub_date
	attr_accessor :link

	def initialize(node)
	@title = node.xpath("title").text
	@raw_content = node.xpath("content:encoded").text
	@pub_date = node.xpath("pubDate").text
	@link = node.xpath("link").text
	end

	def generate_markdown
	content_node = Nokogiri::HTML("<import>#{ @raw_content }</import>")

	# Find all images on the Squarespace CDN
	images = content_node.xpath("//img").select { \|image\| image["src"] and image["src"].include? "squarespace.com" }

	images.each { \|image\|
	# download them and put them in the correct folder

	url = URI.parse(image["src"])
	response = Net::HTTP.get_response url
	content_type = response["Content-Type"]

	extension = ""
	if content_type.include? "image/jpg" \|\| content_type.include? "image/jpeg"
	extension = "jpg"
	elsif content_type.include? "image/gif"
	extension = "gif"
	elsif content_type.include? "image/png"
	extension = "png"
	else
	puts "Warning – unknown file (defaulting to jpeg): #{ image["src"] }"
	extension = "jpeg"
	end

	directory = "img/import#{ link }/"
	filename = "#{ directory }#{ SecureRandom.uuid.gsub('-','').upcase }.#{ extension }"

	FileUtils.mkdir_p directory
	IO.write(filename, response.body)

	puts "Wrote #{ filename } to disk"

	# Change the content_node's img children to point to their new files

	image["src"] = "/#{ filename }"
	image["class"] = "img-responsive"
	}

	# Fix all the iframes
	iframes = content_node.xpath("//iframe")
	iframes.wrap("<div class='embed-responsive embed-responsive-16by9'></div>")
	iframes.each { \|iframe\| iframe["class"] = "embed-responsive-item" }

	# Remove Instagram ickiness

	content_node.xpath("//div[contains(@class,\"instagram-oembed\")]/p").each { \|node\| node.remove }

	# Finally, generate the markdown file from the content_node

	date = Date.parse(@pub_date)

	directory = "blog"
	FileUtils.mkdir_p directory
	filename = "#{ directory }/#{ date.strftime('%Y-%m-%d') }-#{ @title.gsub(' ', '-').gsub(/['"?\/:]/, '').downcase }.markdown"

	body = <<-eos
	---
	title: "#{ @title}"
	date: #{ date.strftime('%Y-%m-%d %H:%M') }
	---

	#{ content_node.xpath("//import").first }

	<!-- more -->

	eos

	IO.write(filename, body)

	puts "Wrote #{ filename }"
	end

	def to_s
	"#{ @title } published on #{ pub_date }"
	end
	end

	filename = ARGV.first
	abort "Usage: ./script path_of_xml_file" unless filename
	abort "File does not exist" unless File.exist?(filename)

	file = File.open(filename)
	doc = Nokogiri::XML(file)
	file.close

	puts "Opened XML file at " + filename

	post_nodes = doc.xpath("//item").select { \| item \| item.xpath("wp:post_type/text()").text == "post" && item.xpath("wp:status/text()").text == "publish" }

	posts = post_nodes.map { \|node\| Post.new(node) }

	# puts posts[0].generate_markdown
	posts.each { \|post\| post.generate_markdown }
No results found