Skip to content

Instantly share code, notes, and snippets.

@acidtib
Created December 1, 2019 07:15
Show Gist options
  • Save acidtib/edc17b16f66c7af31053d029f03e7216 to your computer and use it in GitHub Desktop.
Save acidtib/edc17b16f66c7af31053d029f03e7216 to your computer and use it in GitHub Desktop.
require "selenium-webdriver"
require "nokogiri"
require "json"
options = Selenium::WebDriver::Chrome::Options.new
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-popup-blocking')
options.add_argument('--disable-translate')
@driver = Selenium::WebDriver.for :chrome, options: options
@profile = "https://github.com/tenderlove"
def login
@driver.navigate.to("https://github.com/login")
# start login process by entering username
puts "[INFO]: Entering username"
@driver.find_element(:name, "login").send_keys(ENV["G_USERNAME"])
# then we'll enter the password
puts "[INFO]: Entering password"
@driver.find_element(:name, "password").send_keys(ENV["G_PASSWORD"])
# then we'll click the login button
puts "[INFO]: Logging in"
@driver.find_element(:name, "commit").click
# let's wait here to ensure that the page is fully
# loaded before we navigate to the profile
wait = Selenium::WebDriver::Wait.new(:timout => 10)
wait.until {@driver.find_element(:css, "div.js-repos-container")}
puts "[INFO]: Navigating to profile #{@profile}"
visitProfile
end
def visitProfile
@driver.navigate.to(@profile)
# lets wait for profile page to load
wait = Selenium::WebDriver::Wait.new(:timout => 10)
wait.until {@driver.find_element(:css, "div.user-profile-nav")}
puts "[INFO]: Scraping data"
doc = Nokogiri::HTML(@driver.page_source)
name = doc.css("h1.vcard-names span[itemprop=name]").first
name = name ? name.text : nil
username = doc.css("h1.vcard-names span[itemprop=additionalName]").first
username = username ? username.text : nil
bio = doc.css("div.user-profile-bio div").first
bio = bio ? bio.text.gsub( /'/, "''" ) : nil
website = doc.css("li[itemprop=url] a").first
website = website ? website.text : nil
company = doc.css("li[itemprop=worksFor] span.p-org div").first
company = company ? company.text.gsub( /'/, "''" ) : nil
email = doc.css("li[itemprop=email] a").first
email = email ? email.text : nil
avatar = doc.css("img.avatar-before-user-status").first["src"]
organizations = []
doc.css("a[itemprop=follows]").each do |org|
organizations << {
organization: org.css("img").first["alt"].gsub("@", ""),
img: org.css("img").first["src"]
}
end
payload = {
name: name,
username: username,
website: website,
company: company,
email: email,
avatar: avatar,
bio: bio,
organizations: organizations
}
file = username+".json"
puts "[INFO]: Saving payload to "+file
File.open(file, "w") do |f|
f.write(JSON.pretty_generate(payload))
end
puts "[DONE]:"
sleep(2)
@driver.quit
end
login
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment