matiaskorhonen · September 16, 2016 11:03
diff --git a/tumblr-to-middleman.rb b/tumblr-to-middleman.rb
 #!/usr/bin/env ruby

 require "tumblr_client"
 require "time"
 require "yaml"
 require "fileutils"
 require "open-uri"
 require "nokogiri"
 require "loofah"
 require "reverse_markdown"
 require "kramdown"

 puts "ReverseMarkdown config"
 ReverseMarkdown.config do |config|
  config.unknown_tags = :pass_through
  config.github_flavored = true
 end

 puts "Tumblr config"
 Tumblr.configure do |config|
  config.consumer_key = "..."
  config.consumer_secret = "..."
  config.oauth_token = "..."
  config.oauth_token_secret = "..."
 end

 client = Tumblr::Client.new

 user_info = client.info

 per_page = 20

 puts "Fetching posts"
 data = client.posts("matiaskorhonen.tumblr.com", offset: 0, limit: per_page)

 total_posts = data["total_posts"]

 puts "Total posts: #{total_posts}"

 pages = (total_posts.to_f/per_page).ceil
 posts = data["posts"]

 (pages - 1).times do |page|
  data = client.posts("matiaskorhonen.tumblr.com", offset: (page + 1) * per_page, limit: per_page)

  posts.push(*data["posts"])
 end


 puts "Processing posts"
 posts_count = posts.size
 posts.each_with_index do |post, index|
  puts "=> #{index + 1}/#{posts_count}: #{post["slug"]}"

  date = Time.parse(post["date"])
  basename = "#{date.strftime("%Y-%m-%d")}_#{post["slug"]}"
  filename = "#{basename}.html.md"

  body = ""
  type = post["type"]

  post_path = post["post_url"].gsub("http://matiaskorhonen.tumblr.com/", "") + "/"
  short_path = post_path.gsub(post["slug"] + "/", "")
  aliases = [post_path, short_path]

  if type == "photo"
    aliases << short_path.gsub("post", "image")
  end

  frontmatter ={
    "tumblr_id" => post["id"],
    "alias" => aliases,
    "date" => date,
    "tags" => post["tags"],
    "type" => type
  }

  case type
  when "text"
    frontmatter["title"] = post["title"]
    body = post["body"]
  when "photo"
    frontmatter["source_url"] = post["source_url"] if post["source_url"]
    frontmatter["source_title"] = post["source_title"] if post["source_title"]

    begin
      puts "  -> Fetching image"

      photo_url = post["photos"].first["original_size"]["url"]
      puts "     #{photo_url}"

      extension = File.extname(photo_url.split("/").last)
      photo_name = File.basename(photo_url.split("/").last).split(".").first
      photo_file = "#{photo_name}#{extension}"
      photo_path = "#{basename}/#{photo_file}"

      # unless File.exists? photo_path
      #   photo = open(photo_url)
      #
      #   FileUtils.mkdir_p(basename)
      #   File.open photo_path, "wb+" do |f|
      #     f.write photo.read
      #   end
      # end

      frontmatter["photo"] = photo_file

      body = "#{post["caption"]}\n"
    rescue
      puts post["photos"].inspect
      raise
    end
  when "quote"
    body = post["text"]
    frontmatter["source"] = post["source"]
  when "video"
    body = post["caption"]
    frontmatter["video_url"] = post["permalink_url"]
  when "link"
    frontmatter["title"] = post["title"]
    frontmatter["link_url"] = post["url"]
    body = post["description"]
  else
    raise "Unknown type: #{type}"
  end

  image_scrubber = Loofah::Scrubber.new do |node|
    if node.name == "img"
      puts "  -> Fetching image"

      photo_url = node["src"]
      puts "     #{photo_url}"
      photo = open(photo_url)
      extension = File.extname(photo_url.split("/").last)
      photo_name = File.basename(photo_url.split("/").last).split(".").first
      photo_path = "#{basename}/#{photo_name}#{extension}"
      FileUtils.mkdir_p(basename)
      File.open photo_path, "wb+" do |f|
        f.write photo.read
      end
      node["src"] = "articles/#{photo_path}"
    end
  end

  doc = Loofah.fragment(body).scrub!(image_scrubber)
  body = ReverseMarkdown.convert doc.to_s
  body.gsub!('\_', "_")

  if frontmatter["title"].nil?
    title_md = body.split("\n").first
    title_html = Kramdown::Document.new(title_md).to_html
    frontmatter["title"] = Nokogiri::HTML::DocumentFragment.parse(title_html).text
    frontmatter["title"].gsub!("\n", " ")
    frontmatter["title"].strip!
    puts "  -> Generated title: “#{frontmatter["title"]}”"
  end

  File.open filename, "w+" do |f|
    f.write YAML.dump(frontmatter)
    f.write "---\n\n"
    f.write body
    f.write "\n"
  end
 end

 puts "Done"
	#!/usr/bin/env ruby

	require "tumblr_client"
	require "time"
	require "yaml"
	require "fileutils"
	require "open-uri"
	require "nokogiri"
	require "loofah"
	require "reverse_markdown"
	require "kramdown"

	puts "ReverseMarkdown config"
	ReverseMarkdown.config do \|config\|
	config.unknown_tags = :pass_through
	config.github_flavored = true
	end

	puts "Tumblr config"
	Tumblr.configure do \|config\|
	config.consumer_key = "..."
	config.consumer_secret = "..."
	config.oauth_token = "..."
	config.oauth_token_secret = "..."
	end

	client = Tumblr::Client.new

	user_info = client.info

	per_page = 20

	puts "Fetching posts"
	data = client.posts("matiaskorhonen.tumblr.com", offset: 0, limit: per_page)

	total_posts = data["total_posts"]

	puts "Total posts: #{total_posts}"

	pages = (total_posts.to_f/per_page).ceil
	posts = data["posts"]

	(pages - 1).times do \|page\|
	data = client.posts("matiaskorhonen.tumblr.com", offset: (page + 1) * per_page, limit: per_page)

	posts.push(*data["posts"])
	end


	puts "Processing posts"
	posts_count = posts.size
	posts.each_with_index do \|post, index\|
	puts "=> #{index + 1}/#{posts_count}: #{post["slug"]}"

	date = Time.parse(post["date"])
	basename = "#{date.strftime("%Y-%m-%d")}_#{post["slug"]}"
	filename = "#{basename}.html.md"

	body = ""
	type = post["type"]

	post_path = post["post_url"].gsub("http://matiaskorhonen.tumblr.com/", "") + "/"
	short_path = post_path.gsub(post["slug"] + "/", "")
	aliases = [post_path, short_path]

	if type == "photo"
	aliases << short_path.gsub("post", "image")
	end

	frontmatter ={
	"tumblr_id" => post["id"],
	"alias" => aliases,
	"date" => date,
	"tags" => post["tags"],
	"type" => type
	}

	case type
	when "text"
	frontmatter["title"] = post["title"]
	body = post["body"]
	when "photo"
	frontmatter["source_url"] = post["source_url"] if post["source_url"]
	frontmatter["source_title"] = post["source_title"] if post["source_title"]

	begin
	puts " -> Fetching image"

	photo_url = post["photos"].first["original_size"]["url"]
	puts " #{photo_url}"

	extension = File.extname(photo_url.split("/").last)
	photo_name = File.basename(photo_url.split("/").last).split(".").first
	photo_file = "#{photo_name}#{extension}"
	photo_path = "#{basename}/#{photo_file}"

	# unless File.exists? photo_path
	# photo = open(photo_url)
	#
	# FileUtils.mkdir_p(basename)
	# File.open photo_path, "wb+" do \|f\|
	# f.write photo.read
	# end
	# end

	frontmatter["photo"] = photo_file

	body = "#{post["caption"]}\n"
	rescue
	puts post["photos"].inspect
	raise
	end
	when "quote"
	body = post["text"]
	frontmatter["source"] = post["source"]
	when "video"
	body = post["caption"]
	frontmatter["video_url"] = post["permalink_url"]
	when "link"
	frontmatter["title"] = post["title"]
	frontmatter["link_url"] = post["url"]
	body = post["description"]
	else
	raise "Unknown type: #{type}"
	end

	image_scrubber = Loofah::Scrubber.new do \|node\|
	if node.name == "img"
	puts " -> Fetching image"

	photo_url = node["src"]
	puts " #{photo_url}"
	photo = open(photo_url)
	extension = File.extname(photo_url.split("/").last)
	photo_name = File.basename(photo_url.split("/").last).split(".").first
	photo_path = "#{basename}/#{photo_name}#{extension}"
	FileUtils.mkdir_p(basename)
	File.open photo_path, "wb+" do \|f\|
	f.write photo.read
	end
	node["src"] = "articles/#{photo_path}"
	end
	end

	doc = Loofah.fragment(body).scrub!(image_scrubber)
	body = ReverseMarkdown.convert doc.to_s
	body.gsub!('\_', "_")

	if frontmatter["title"].nil?
	title_md = body.split("\n").first
	title_html = Kramdown::Document.new(title_md).to_html
	frontmatter["title"] = Nokogiri::HTML::DocumentFragment.parse(title_html).text
	frontmatter["title"].gsub!("\n", " ")
	frontmatter["title"].strip!
	puts " -> Generated title: “#{frontmatter["title"]}”"
	end

	File.open filename, "w+" do \|f\|
	f.write YAML.dump(frontmatter)
	f.write "---\n\n"
	f.write body
	f.write "\n"
	end
	end

	puts "Done"