Created
December 30, 2014 11:32
-
-
Save kusayuzayushko/28dd03382364aae0d42b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'net/telnet' | |
require 'tor_requests' | |
require 'json' | |
def switch_endpoint | |
localhost = Net::Telnet::new("Host" => "localhost", "Port" => "9050", "Timeout" => 10, "Prompt" => /250 OK\n/) | |
localhost.cmd('AUTHENTICATE "hi"') { |c| print c; throw "Cannot authenticate to Tor" if c != "250 OK\n" } | |
localhost.cmd('signal NEWNYM') { |c| print c; throw "Cannot switch Tor to new route" if c != "250 OK\n" } | |
localhost.close | |
end | |
Tor.configure do |config| | |
config.ip = "127.0.0.1" | |
config.port = 9051 | |
config.add_header('User-Agent', 'Chrome version 29') | |
end | |
@search_size = 8 # define res_size and res_page | |
# This optional argument supplies the host language of the application making the request. | |
# If this argument is not present then the system will choose a value based on the value of the Accept-Language http header. | |
# If this header is not present, a value of 'en' is assumed. | |
@res_lang = "&gl=ru" | |
# This argument supplies the query, or search expression, that is passed into the searcher. | |
res_query = "&q=test+search" | |
# This optional argument supplies the number of results that the application would like to recieve. | |
# Values can be any integer between 1 and 8. | |
# Alternately, a value of small indicates a small result set size or 4 results. | |
# A value of large indicates a large result set or 8 results. | |
@res_size = "&rsz=#{@search_size}" | |
# This optional argument controls turning on or off the duplicate content filter: | |
# filter=0 - Turns off the duplicate content filter | |
# filter=1 - Turns on the duplicate content filter (default) | |
@res_filter = "&filter=1" | |
# This optional argument supplies the search safety level which may be one of: | |
# safe=active - enables the highest level of safe search filtering | |
# safe=moderate - enables moderate safe search filtering (default) | |
# safe=off - disables safe search filtering | |
@res_safe = "&safe=off" | |
# This optional argument supplies the start index of the first search result (depends of res_size). | |
# Each successful response contains a cursor object (see below) which includes an array of pages. | |
# The start property for a page may be used as a valid value for this argument. | |
@res_page = "&start=" | |
@urls = Array.new | |
def ajax_search(query) | |
i = 0 | |
while i < 64 | |
res = Tor::HTTP.get('ajax.googleapis.com', "/ajax/services/search/web?v=1.0" + | |
"#{@res_lang}" + "#{@res_size}" + "#{@res_filter}" + "#{@res_safe}" + | |
"#{query}" + "#{@res_page}" + "#{i}", 80 ) | |
hash = JSON.parse(res.body) | |
puts 'requesting http://ajax.googleapis.com' + "/ajax/services/search/web?v=1.0" + | |
"#{@res_lang}" + "#{@res_size}" + "#{@res_filter}" + "#{@res_safe}" + | |
"#{query}" + "#{@res_page}" + "#{i}" | |
hash["responseData"]["results"].each { |i| @urls.push("#{i["url"]}")} | |
while hash["responseStatus"] == "400" || hash["responseData"].empty? do | |
puts hash["responseDetails"] | |
switch_endpoint | |
res = Tor::HTTP.get('ajax.googleapis.com', "/ajax/services/search/web?v=1.0" + | |
"#{@res_lang}" + "#{@res_size}" + "#{@res_filter}" + "#{@res_safe}" + | |
"#{query}" + "#{@res_page}" + "#{i}", 80 ) | |
hash = JSON.parse(res.body) | |
hash["responseData"]["results"].each { |i| @urls.push("#{i["url"]}")} | |
end | |
i += @search_size | |
end | |
end | |
ajax_search(res_query) | |
@urls.flatten.uniq.each { |url| puts url} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment