Skip to content

Instantly share code, notes, and snippets.

@ybenjo
Created April 23, 2011 09:05
Show Gist options
  • Save ybenjo/938486 to your computer and use it in GitHub Desktop.
Save ybenjo/938486 to your computer and use it in GitHub Desktop.
crawl mtg wiki
require "nokogiri"
require "open-uri"
require "progressbar"
def parse_flavor_text(card_url)
page = Nokogiri(open(card_url))
card_title = (page/"td.mc"/"b").inner_text
fravor_text = (page/"i").inner_text.delete("\n\r\t")
return card_title, fravor_text
end
def get_card_url_list(url)
url_list = []
page = Nokogiri(open(url))
(page/"div.card"/"a").each do |elem|
url_list.push elem["href"]
end
return url_list
end
if $0 == __FILE__
search_url = "http://whisper.wisdom-guild.net/search.php?sort=name_en&set[]=SOM&set[]=MBS&set[]=%3F%3F%3F&set[]=ZEN&set[]=WWK&set[]=ROE&set[]=ALA&set[]=CO
N&set[]=ARB&set[]=SHM&set[]=EVE&set[]=LRW&set[]=MOR&set[]=TSP&set[]=TSB&set[]=PLC&set[]=FUT&set[]=RAV&set[]=GPT&set[]=DIS&set[]=CHK&set[]=BOK&set[]=SOK&set[
]=MRD&set[]=DST&set[]=5DN&set[]=ONS&set[]=LGN&set[]=SCG&set[]=ODY&set[]=TOR&set[]=JUD&set[]=INV&set[]=PLS&set[]=APC&set[]=MMQ&set[]=NEM&set[]=PCY&set[]=USG&
set[]=ULG&set[]=UDS&set[]=TMP&set[]=STH&set[]=EXO&set[]=MIR&set[]=VIS&set[]=WTH&page="
progress_bar = ProgressBar.new("Crawl", 9257)
open("./mtg_card.txt", "w"){|f|
1.upto(185) do |i|
current_search_url = search_url + i.to_s
url_list = get_card_url_list(current_search_url)
url_list.each do |card_url|
card_title, flavor_text = parse_flavor_text(card_url)
sleep(10)
progress_bar.inc
next if card_title == "" || flavor_text == ""
f.puts "#{flavor_text} - #{card_title}"
end
end
}
progress_bar.finish
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment