Skip to content

Instantly share code, notes, and snippets.

@stansidel
Created November 1, 2013 10:07
Show Gist options
  • Save stansidel/7263360 to your computer and use it in GitHub Desktop.
Save stansidel/7263360 to your computer and use it in GitHub Desktop.
Check url redirects to be correct
#!/usr/bin/env ruby
# encoding: utf-8
`chcp 65001`
require 'rubygems'
require 'httpclient'
require 'csv'
filename = ARGV[0] || 'data/in.csv'
from_line = ARGV[1] || 0
output_filename = 'data/out.csv'
result_template = {
old_url: "old_url",
new_url: "new_url",
result: "result",
status: "status",
location: "location"
}
results = []
results << result_template.clone
result_template.each { |k, v| result_template[k] = '' }
httpc = HTTPClient.new
begin
CSV.foreach(filename, { col_sep: ","}) do |row|
if from_line > 0 and from_line > $.
next
end
puts $.
old_url = row[0].strip.downcase
new_url = row[1].strip.downcase
next if !old_url || !new_url
result_array = result_template.clone
result_array[:old_url] = old_url
result_array[:new_url] = new_url
begin
response = httpc.head(old_url)
rescue Exception => e
result_array[:result] = 'error'
result_array[:location] = e.to_s
puts old_url
puts e
results << result_array
next
end
result_array[:status] = response.status
if %w(301 302).include? response.status.to_s
result_array[:location] = response.header['Location'][0].strip.downcase
if result_array[:location] == new_url
result_array[:result] = 'ok'
else
result_array[:result] = 'error'
end
else
result_array[:result] = 'error'
end
unless result_array[:result] == 'ok'
puts result_array
end
results << result_array
end
rescue Exception => e
puts "Exception: #{e}"
end
puts "Writing to file..."
if from_line > 0
type = "ab"
results.delete_at(0)
else
type = "wb"
end
CSV.open(output_filename, type) do |csv|
results.each { |result_row| csv << result_row.values }
end
puts "Finished"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment