Created
June 17, 2013 02:13
-
-
Save soasme/5794268 to your computer and use it in GitHub Desktop.
This script will save http://www.shanbay.com/wordlist/23/15930/ words to my local storage.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
require 'nokogiri' | |
require 'open-uri' | |
require 'sqlite3' | |
$db = SQLite3::Database.new "words.db" | |
begin | |
rows = $db.execute <<-SQL | |
create table words ( | |
id integer PRIMARY KEY autoincrement, | |
word varchar(50), | |
meaning varchar(100) | |
); | |
SQL | |
rescue SQLite3::SQLException | |
end | |
def fetch_page(id) | |
url = "http://www.shanbay.com/wordlist/23/15930/?page=#{ id }" | |
open(url) do |u| | |
html = u.read | |
doc = Nokogiri::HTML(html) | |
doc.css('table.table-striped .row').each do |row| | |
word = row.css('.span2 strong').inner_text | |
meaning = row.css('.span10').inner_text | |
record = $db.get_first_row("select * from words where word=?", word) | |
if record.nil? | |
$db.execute("insert into words(word, meaning) values(?,?)", word, meaning) | |
end | |
puts "#{ record.nil? } #{ word } : #{ meaning }" | |
end | |
end | |
end | |
(1..578).each do |i| | |
puts i | |
fetch_page(i) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment