Created
February 12, 2015 22:54
-
-
Save nathanpalmer/0b9f670201a623c272a3 to your computer and use it in GitHub Desktop.
Convert image sources to be usable even through redirects and https
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'imgkit' | |
| require 'dimensions' | |
| require 'nokogiri' | |
| require 'net/http' | |
| require 'uri' | |
| count = 0 | |
| def get_url(original_url, count, redirect_url = nil, limit = 10) | |
| raise ArgumentError, 'HTTP redirect too deep' if limit == 0 | |
| url = URI.parse(redirect_url || original_url) | |
| request = Net::HTTP::Get.new(url.path, { 'User-Agent' => 'Mozilla/5.0' }) | |
| sock = Net::HTTP.new(url.host, url.port) | |
| if url.scheme == 'https' | |
| sock.use_ssl = true | |
| end | |
| response = sock.start { |http| http.request(request) } | |
| case response.code | |
| when "200" | |
| filename = "images/#{count}-#{File.basename(url.to_s)}" | |
| puts "Loading #{filename}" | |
| File.open(filename, 'wb') { |f| f.write response.body } | |
| return "file://#{File.expand_path File.dirname(__FILE__)}/#{filename}" | |
| else | |
| if response.kind_of?(Net::HTTPRedirection) | |
| return get_url(original_url, count, response['location'], limit - 1) | |
| end | |
| return original_url | |
| end | |
| end | |
| Dir.mkdir("images") unless Dir.exists?("images") | |
| body = File.read('trendy_https_image_redirect.html') | |
| doc = Nokogiri::HTML(body) | |
| doc.xpath('//img').each do |item| | |
| count += 1 | |
| item['src'] = get_url(item['src'], count) | |
| end | |
| zoom = 1 | |
| img = IMGKit.new(doc.to_html).to_img(:png) | |
| File.open('image.png', 'wb') { |f| f.write img } | |
| File.open('image.html', 'w') { |f| f.write doc.to_html } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment