Skip to content

Instantly share code, notes, and snippets.

@mrbrutti
Created June 14, 2010 02:45
Show Gist options
  • Select an option

  • Save mrbrutti/437222 to your computer and use it in GitHub Desktop.

Select an option

Save mrbrutti/437222 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'net/http'
require 'cgi'
class Github
GOOGLE = "www.google.com"
GITHUB = "www.github.com"
PORT = 80
QUERY = "/cse?q=site:github.com++intitle:Profile&hl=en&cof=&num=100&filter=0&safe=off&start="
def initialize(hits = -1)
@profiles = []
@emails_list = []
@start = 0
@totalhits = hits.to_i
end
attr_accessor :emails_list, :profiles
def search
get GOOGLE, PORT, QUERY+@start.to_s do |res|
parse(res.body)
@start = @start + 100
crawl(res.body.gsub(/<em>|<\/em>/,"").gsub(/<b>|<\/b>/,"")) unless @totalhits == 0
sleep(2) and search if @totalhits > @start
end
end
private
def get( url, port, query, &block )
http = Net::HTTP.new(url,port)
begin
http.start do |http|
request = Net::HTTP::Get.new(query)
response = http.request(request)
case response
when Net::HTTPSuccess,Net::HTTPRedirection then
block.call(response)
else
return response.error!
end
end
rescue Net::HTTPFatalError
puts "Error: Something went wrong with the HTTP request"
rescue Net::HTTPServerException
puts "Error: Something went wrong with the HTTP request"
rescue
puts "Error: Something went wrong :(" + $!
end
end
def get_profile(uri_str, limit = 10, &block)
begin
# You should choose better exception.
raise ArgumentError, 'HTTP redirect too deep' if limit == 0
response = Net::HTTP.get_response(URI.parse(uri_str))
case response
when Net::HTTPSuccess then
#puts response.body
block.call(response.body)
when Net::HTTPRedirection then
#puts response['location']
get_profile(response['location'], limit - 1, &block)
else
response.error!
end
rescue
return nil
end
end
def parse( html )
#Results <b>1</b> - <b>8</b> of <b>8</b> from <b>www.google.com</b>
hits = html.scan(/<\/b> of [\w\s]*<b>(.*)<\/b> from /)
if hits.empty? or hits == nil
@totalhits = 0
elsif @totalhits == -1
@totalhits = hits[0][0].gsub(",","").to_i
else
realhits = hits[0][0].gsub(",","").to_i
@totalhits = realhits if @totalhits > realhits
end
end
def crawl(text)
text.scan(/"http:\/\/github.com\/([\w]*)/).each do |profile|
get_profile "http://#{GITHUB}/#{profile[0]}" do |res|
@profiles << [ profile[0], "http://#{GITHUB}/#{profile[0]}", emails(res) ]
end
end
end
def emails(text)
# Some emails are encoded. decode and replace.
text.gsub!(/eval\(decodeURIComponent\('.*'\)\)/) { |a| CGI.unescape(a) }
# Scan for emails.
list = text.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*_at_\
(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|\
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]\
*[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|[a-z0-9!#$&'*+=?^_`{|}~-]+\
(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|\
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*\s@\s(?:[a-z0-9](?:[a-z0-9-]*\
[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\sdot\s[a-z0-9!#$&'*+=?^_`\
{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\sdot\s)+[a-z](?:[a-z-]*[a-z])??/i)
c_list = fix(list)
c_list.uniq!.each {|e| puts e}
@emails_list.concat(c_list).uniq!
c_list
end
def fix(list)
list.each do |e|
e.gsub!(" at ","@")
e.gsub!("_at_","@")
e.gsub!(" dot ",".")
e.gsub!(/[+0-9]{0,3}[0-9()]{3,5}[-]{0,1}[0-9]{3,4}[-]{0,1}[0-9]{3,5}/,"")
end
end
end
git = Github.new(ARGV[0])
puts "---------------------------------------------"
puts " Searching #{ARGV[0]} "
puts "---------------------------------------------"
git.search
puts "---------------------------------------------"
puts " Final Results "
puts "---------------------------------------------"
git.emails_list.each { |email| puts email}
if ARGV[1]
puts "---------------------------------------------"
puts " Saving Files into #{ARGV[1]} "
puts "---------------------------------------------"
out = File.new(ARGV[1], "w")
out << "PROFILE, URL, EMAILS\n"
git.profiles.each { |r| out << "#{r[0].upcase},#{r[1]},#{r[2].uniq.map {|x| x + "|"}.to_s[0...-1]}\n"}
out.close
end
puts "---------------------------------------------"
puts " HAPPY HACKING !!! :) "
puts "---------------------------------------------"
@MayaMale
Copy link

Is this still working?

@mrbrutti
Copy link
Author

It should with a few changes. I will make some changes to Google parsing and the github parser and it should work in no time.

@MayaMale
Copy link

Great! I was getting crazy to understand why it's not working right now :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment