Created
February 12, 2020 09:42
-
-
Save hiepph/5202a8317582c1e2158b9a4e51c46cec to your computer and use it in GitHub Desktop.
Ruby selenium
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# crawl images from gogole images search using Selenium | |
# requirement: geckodriver | |
# gem: selenium-webdriver nokogiri | |
require 'selenium-webdriver' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'securerandom' | |
driver = Selenium::WebDriver.for :firefox | |
driver.navigate.to "https://www.google.com/search?tbm=isch&q=single logos" | |
element = driver.find_element(tag_name: 'body') | |
50.times do | |
element.send_keys([:page_down]) | |
sleep 0.3 | |
end | |
source = driver.page_source | |
doc = Nokogiri.HTML(source) | |
doc.search('img').each do |link| | |
src = link.attr('src') | |
if src and src.start_with?("http") | |
File.open("logos/#{SecureRandom.uuid}.png", "wb") do |f| | |
puts src | |
f.write(open(src).read) | |
end | |
end | |
end | |
driver.quit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment