Created
June 19, 2015 15:48
-
-
Save clausd/f7d10bb5858d4b4125f5 to your computer and use it in GitHub Desktop.
Valgskrab
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'mechanize' | |
| require 'logger' | |
| require 'json' | |
| ROOT = "http://www.kmdvalg.dk/main" | |
| def get_agent | |
| agent = Mechanize.new | |
| agent.log = Logger.new "mech.log" | |
| agent.user_agent_alias = 'Mac Safari' | |
| agent | |
| end | |
| def get_all_voting_stations | |
| agent = get_agent | |
| all = agent.get ROOT | |
| results = [] | |
| searched = {} | |
| searches = [] | |
| searches = all.links.select {|l| l.href.match('fv')}.map {|l| l.href} | |
| link = searches.shift | |
| while link | |
| res = agent.get link | |
| good_link = link.match(/(F[0-9]+)/) | |
| prefix = good_link[1] if good_link | |
| if prefix | |
| searches += res.links.select {|l| l.href && l.href.match(prefix) && !searched[l.href]}.map {|l| searched[l.href] = true; l.href} | |
| end | |
| p prefix | |
| # sleep 1 # I gave up being nice | |
| if link.match(/F[0-9]{8}/) | |
| results.push [link, Time.now, get_tables(res)] | |
| end | |
| link = searches.shift | |
| end | |
| results | |
| end | |
| def get_tables(page) | |
| page.search('table').map {|t| t.search('tr').map {|r| r.search('td').map {|c| c.text}}} | |
| end | |
| res = get_all_voting_stations | |
| File.open('stemmesteder.csv', 'w') {|f| f.write res.map {|s| s[2][1][1..-1].map {|r| r.join(';')}}.join("\n")} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment