Skip to content

Instantly share code, notes, and snippets.

@HugoLnx
Created September 26, 2019 20:13
Show Gist options
  • Save HugoLnx/f208cd561e58ec1339ecd41712f1ac97 to your computer and use it in GitHub Desktop.
Save HugoLnx/f208cd561e58ec1339ecd41712f1ac97 to your computer and use it in GitHub Desktop.
#require 'typhoeus'
require 'rest-client'
require 'open-uri'
EXTENSIONS = %w[xml rss atom]
SUFFIXES = %w[/ /feed /rss /atom /feed/rss /feed/atom /news /blog /news/feed /blog/feed]
PREFIXES = %w[/feed /rss /atom]
URLS = %w[
https://www.gamesradar.com/edge
https://www.gamesradar.com/uk/edge/
http://www.gamesradar.com/edge
http://www.gamesradar.com/uk/edge/
]
urls = ARGV.empty? ? URLS : ARGV
url_variations = urls.flat_map do |url|
[
url,
URI(url).tap{|u| u.path = "/"; u}.to_s,
]
end
paths = SUFFIXES.flat_map do |suffix|
url_variations.map{|url| File.join(url.to_s, suffix)}
end + PREFIXES.flat_map do |prefix|
url_variations.map do |url|
URI(url).tap{|u| u.path = File.join(prefix, u.path); u}.to_s
end
end
paths = paths.flat_map do |path|
[path] + EXTENSIONS.map{|ext| path.gsub(%r{/$}, "") + ".#{ext}"}
end
paths = paths.map{|path| path.gsub(%r{/$}, "")}.uniq.sort
successful = []
unsuccessful = []
#reqs = paths.sort.uniq.map do |path|
# req = Typhoeus::Request.new(path, followlocation: true)
# req.on_complete do |res|
# if res.success? && res.code.between?(200, 400)
# successful << path
# else
# unsuccessful << "[#{res.code}] #{path}"
# end
# end
# req
#end
paths.sort.uniq.each_slice(5).map do |slice|
Thread.new do
slice.each do |url|
begin
res = RestClient.get(url)
if res.code.between?(200, 399)
successful << url
else
unsuccessful << "[#{res.code}] #{url}"
end
rescue RestClient::ExceptionWithResponse => err
unsuccessful << "[#{err.response&.code || err.class}] #{url}"
rescue
unsuccessful << "[nil] #{url}"
end
end
end
end.each(&:join)
#hydra = Typhoeus::Hydra.new(max_concurrency: 50)
#reqs.each{|r| hydra.queue r}
#hydra.run
puts "ERROR"
puts unsuccessful.sort
puts "\nSUCCESS"
puts successful.sort
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment