Created
June 26, 2011 15:05
UOC scraping with standalone Capybara
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'capybara' | |
require 'capybara/dsl' | |
require 'akephalos' | |
Capybara.run_server = false | |
Capybara.current_driver = :akephalos | |
Capybara.app_host = 'http://cv.uoc.edu' | |
module MyCapybaraTest | |
class Test | |
include Capybara | |
def test_uoc | |
p 'Logging in...' | |
visit('/') | |
within 'form[name=loginForm]' do | |
fill_in 'l', with: 'xxxx' | |
fill_in 'p', with: 'xxxx' | |
end | |
page.execute_script("$('input.img').trigger('click');") | |
p 'Entering navigation frame...' | |
url = "http://cv.uoc.edu/rb/inici/navigation/main/35309" | |
visit url | |
p 'Entering main frame...' | |
url = page.find(:css, 'frame[name=main]')[:src]; | |
visit url | |
p 'Entering class frame...' | |
url = page.find(:css, 'frame[name=aula]')[:src].split('/') | |
url.shift | |
url.unshift '' | |
url = url.join('/') | |
visit url | |
p 'Entering menu frame...' | |
url = page.find(:css, 'frame[name=cl_menu]')[:src]; | |
visit url | |
_links = [] | |
links = page.all('a').each do |link| | |
_links << link if link.text =~ /Lliurament i registre/ | |
end | |
p 'Clicking links.....' | |
notes = {} | |
_links.compact.each do |link| | |
puts "about to click #{link}" | |
link.click | |
p "***"*20 | |
puts page.body | |
p "***"*20 | |
titol = page.find(:css, 'h3').text | |
notes[titol] = {} | |
trs = page.all('table.tablaNotas tbody tr') | |
trs -= trs[0..3] | |
trs.each do |tr| | |
pac = tr.find(:css, 'td.PacEstudiant').text | |
nota = tr.find(:css, 'td.Nota').text | |
notes[titol][pac] = nota | |
puts "Adding #{titol}[#{pac}] = #{nota}" | |
end | |
end | |
puts notes.inspect | |
# url = page.find(:css, 'frame[name=planaInici]')[:src]; | |
# visit url | |
# link = page.find(:css, '#grid_right_navigation_old_page_item'); | |
# link.click | |
# page.find(:css, 'a.fntmnsel1').click | |
end | |
end | |
end | |
t = MyCapybaraTest::Test.new | |
t.test_uoc | |
puts t.page.body |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment