-
-
Save pifleo/3889803 to your computer and use it in GitHub Desktop.
Proxy List Fetcher Using Ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ chmod +x ./proxy_list_fetcher.rb | |
$ ./proxy_list_fetcher.rb | head | |
180.94.88.58 8080 Afghanistan fast HTTPS High +KA | |
187.87.155.2 8080 Brazil fast HTTPS High +KA | |
120.132.132.119 8080 China medium HTTPS High +KA | |
210.211.109.244 8081 Viet Nam fast HTTP Low | |
216.244.71.143 3128 United States medium HTTPS High +KA | |
190.237.224.22 8080 Peru slow HTTPS High +KA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# wget -qO- http://hidemyass.com/proxy-list/index.php | ./proxy_getter.rb | head | |
# http://evilzone.org/security-tools/(bubzuru)-hidemyass-proxy-list-grabber-(alpha)/ | |
# proxy list: http://www.hidemyass.com/proxy-list/ | |
require 'nokogiri' | |
require 'open-uri' | |
#doc = Nokogiri::HTML(ARGF.read) | |
doc = Nokogiri::HTML(open('http://hidemyass.com/proxy-list/index.php')) | |
rows = doc.xpath('//table[@id="listtable"]/tr') | |
results = rows.collect do |row| | |
result = {} | |
row.xpath('td').each_with_index do |td, i| | |
case i | |
when 1 | |
good, bytes = [], [] | |
css = td.at_xpath('span/style/text()').to_s | |
css.split.each {|l| good << $1 if l.match(/\.(.+?)\{.*inline/)} | |
td.xpath('span/span | span | span/text()').each do |span| | |
if span.is_a?(Nokogiri::XML::Text) | |
bytes << $1 if span.content.strip.match(/\.{0,1}(.+)\.{0,1}/) | |
elsif ( | |
(span['style'] && span['style'] =~ /inline/) || | |
(span['class'] && good.include?(span['class'])) || | |
(span['class'] =~ /^[0-9]/) | |
) | |
bytes << span.content | |
end | |
end | |
result[:addr] = bytes.join('.').gsub(/\.+/,'.') | |
when 2 then result[:port] = td.content.strip | |
when 3 then result[:country] = td.content.strip | |
when 4 | |
result[:response_time] = td.at_xpath('div')["rel"] | |
result[:speed] = td.at_xpath('div/div')["class"] | |
when 6 then result[:type] = td.content.strip | |
when 7 | |
result[:anonymity] = td.content.strip | |
end | |
end | |
result | |
end | |
results.each { |res| puts "#{res[:addr]}\t#{res[:port]}\t#{res[:country]}\t#{res[:speed]}\t#{res[:type]}\t#{res[:anonymity]}" } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment