Skip to content

Instantly share code, notes, and snippets.

@kusayuzayushko
Created December 30, 2014 11:32
Show Gist options
  • Save kusayuzayushko/28dd03382364aae0d42b to your computer and use it in GitHub Desktop.
Save kusayuzayushko/28dd03382364aae0d42b to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'net/telnet'
require 'tor_requests'
require 'json'
def switch_endpoint
localhost = Net::Telnet::new("Host" => "localhost", "Port" => "9050", "Timeout" => 10, "Prompt" => /250 OK\n/)
localhost.cmd('AUTHENTICATE "hi"') { |c| print c; throw "Cannot authenticate to Tor" if c != "250 OK\n" }
localhost.cmd('signal NEWNYM') { |c| print c; throw "Cannot switch Tor to new route" if c != "250 OK\n" }
localhost.close
end
Tor.configure do |config|
config.ip = "127.0.0.1"
config.port = 9051
config.add_header('User-Agent', 'Chrome version 29')
end
@search_size = 8 # define res_size and res_page
# This optional argument supplies the host language of the application making the request.
# If this argument is not present then the system will choose a value based on the value of the Accept-Language http header.
# If this header is not present, a value of 'en' is assumed.
@res_lang = "&gl=ru"
# This argument supplies the query, or search expression, that is passed into the searcher.
res_query = "&q=test+search"
# This optional argument supplies the number of results that the application would like to recieve.
# Values can be any integer between 1 and 8.
# Alternately, a value of small indicates a small result set size or 4 results.
# A value of large indicates a large result set or 8 results.
@res_size = "&rsz=#{@search_size}"
# This optional argument controls turning on or off the duplicate content filter:
# filter=0 - Turns off the duplicate content filter
# filter=1 - Turns on the duplicate content filter (default)
@res_filter = "&filter=1"
# This optional argument supplies the search safety level which may be one of:
# safe=active - enables the highest level of safe search filtering
# safe=moderate - enables moderate safe search filtering (default)
# safe=off - disables safe search filtering
@res_safe = "&safe=off"
# This optional argument supplies the start index of the first search result (depends of res_size).
# Each successful response contains a cursor object (see below) which includes an array of pages.
# The start property for a page may be used as a valid value for this argument.
@res_page = "&start="
@urls = Array.new
def ajax_search(query)
i = 0
while i < 64
res = Tor::HTTP.get('ajax.googleapis.com', "/ajax/services/search/web?v=1.0" +
"#{@res_lang}" + "#{@res_size}" + "#{@res_filter}" + "#{@res_safe}" +
"#{query}" + "#{@res_page}" + "#{i}", 80 )
hash = JSON.parse(res.body)
puts 'requesting http://ajax.googleapis.com' + "/ajax/services/search/web?v=1.0" +
"#{@res_lang}" + "#{@res_size}" + "#{@res_filter}" + "#{@res_safe}" +
"#{query}" + "#{@res_page}" + "#{i}"
hash["responseData"]["results"].each { |i| @urls.push("#{i["url"]}")}
while hash["responseStatus"] == "400" || hash["responseData"].empty? do
puts hash["responseDetails"]
switch_endpoint
res = Tor::HTTP.get('ajax.googleapis.com', "/ajax/services/search/web?v=1.0" +
"#{@res_lang}" + "#{@res_size}" + "#{@res_filter}" + "#{@res_safe}" +
"#{query}" + "#{@res_page}" + "#{i}", 80 )
hash = JSON.parse(res.body)
hash["responseData"]["results"].each { |i| @urls.push("#{i["url"]}")}
end
i += @search_size
end
end
ajax_search(res_query)
@urls.flatten.uniq.each { |url| puts url}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment