Created
January 23, 2019 03:05
-
-
Save kelby/a3ee9c1520f068b6e735fe98cdeebfd6 to your computer and use it in GitHub Desktop.
发明者-策略广场数据列表
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'headless' | |
require 'watir' | |
require 'csv' | |
require 'nokogiri' | |
browser ||= Watir::Browser.new :chrome, headless: true | |
CSV.open("fmz-strategies-square.csv", "wb") do |csv| | |
11.times do |page_number| | |
page_url = "https://www.fmz.com/square/-1/#{page_number + 1}?lang=zh_CN" | |
sleep((1..3).to_a.sample) | |
browser.goto page_url | |
Watir::Wait.until { browser.table.exists? } | |
doc = Nokogiri::HTML(browser.html); | |
doc_items = doc.css("table tbody tr"); | |
puts "抓取到页面 #{page_number + 1}, 有数据 #{doc_items.size} 条." | |
break if doc_items.size.zero? | |
csv << ["名称", "标签", "被复制次数", "种类", "最后修改时间", "URL"] if page_number.zero? | |
doc_items.each do |item| | |
name = item.css("a.ng-binding").first.text | |
tag = item.css("span span").first.text | |
counter = item.css("span.counter").text | |
type = item.css("td")[1].text | |
time = item.css("td")[2].text | |
url = [base_url, item.css("a.ng-binding").first.attr("href")].join("") | |
csv << [name, tag, counter, type, time, url] | |
end | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment