Skip to content

Instantly share code, notes, and snippets.

@clausd
Created June 19, 2015 15:48
Show Gist options
  • Select an option

  • Save clausd/f7d10bb5858d4b4125f5 to your computer and use it in GitHub Desktop.

Select an option

Save clausd/f7d10bb5858d4b4125f5 to your computer and use it in GitHub Desktop.
Valgskrab
require 'mechanize'
require 'logger'
require 'json'
ROOT = "http://www.kmdvalg.dk/main"
def get_agent
agent = Mechanize.new
agent.log = Logger.new "mech.log"
agent.user_agent_alias = 'Mac Safari'
agent
end
def get_all_voting_stations
agent = get_agent
all = agent.get ROOT
results = []
searched = {}
searches = []
searches = all.links.select {|l| l.href.match('fv')}.map {|l| l.href}
link = searches.shift
while link
res = agent.get link
good_link = link.match(/(F[0-9]+)/)
prefix = good_link[1] if good_link
if prefix
searches += res.links.select {|l| l.href && l.href.match(prefix) && !searched[l.href]}.map {|l| searched[l.href] = true; l.href}
end
p prefix
# sleep 1 # I gave up being nice
if link.match(/F[0-9]{8}/)
results.push [link, Time.now, get_tables(res)]
end
link = searches.shift
end
results
end
def get_tables(page)
page.search('table').map {|t| t.search('tr').map {|r| r.search('td').map {|c| c.text}}}
end
res = get_all_voting_stations
File.open('stemmesteder.csv', 'w') {|f| f.write res.map {|s| s[2][1][1..-1].map {|r| r.join(';')}}.join("\n")}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment