Skip to content

Instantly share code, notes, and snippets.

@ustroetz
Created January 27, 2016 13:23
Show Gist options
  • Select an option

  • Save ustroetz/b0e2a3c35fa259b07693 to your computer and use it in GitHub Desktop.

Select an option

Save ustroetz/b0e2a3c35fa259b07693 to your computer and use it in GitHub Desktop.
Script to download bus schedules from WienerLinien
from time import sleep
import requests
from splinter import Browser
import thread
import pdb
def download_pdf(browser, route_name, weekday, direction):
pdf_url = browser.find_link_by_partial_href('FileOutputName')['href']
response = requests.get(pdf_url)
filename = route_name + '_' + weekday + '_' + direction + '.pdf'
with open(filename, 'wb') as f:
f.write(response.content)
def generate_pdf(browser, route_name, weekday, direction):
url = "http://routenplaner.vor.at/bin/tb/trainsearch.exe/dn?&L=vs_p2w&lfp=1&"
browser.visit(url)
filed_values = {'trainname': route_name}
browser.fill_form(filed_values)
button = browser.find_by_name('HWAI=QUERY!advancedSearch=yes!&dummy')
button.click()
filed_values = {'valueDateBegin': 'Mo, 01.02.2016', 'valueDateEnd': 'Mo, 08.02.2016' }
browser.fill_form(filed_values)
if weekday == 'workdays':
browser.check("checkWorkdays")
browser.uncheck("checkSaturday")
browser.uncheck("checkSunday")
elif weekday == 'saturday':
browser.uncheck("checkWorkdays")
browser.check("checkSaturday")
browser.uncheck("checkSunday")
elif weekday == 'sunday':
browser.uncheck("checkWorkdays")
browser.uncheck("checkSaturday")
browser.check("checkSunday")
button = browser.find_by_name('sq')
button.click()
if browser.is_text_present('Ihre Abfrage kann zurzeit leider nicht berechnet werden.'):
print 'Ihre Abfrage kann zurzeit leider nicht berechnet werden.'
return False
if not browser.is_text_present('Linienfahrplan'):
print 'Empty page'
return False
if direction == 'forward':
browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_0').uncheck()
browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_1').check()
elif direction == 'backward':
browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_0').check()
browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_1').uncheck()
button = browser.find_by_name('start')
button.click()
pdf_exists = False
pdf_doesnt_exist = False
while not pdf_exists and not pdf_doesnt_exist:
sleep(10)
pdf_exists = browser.is_text_present('ist fertiggestellt')
pdf_doesnt_exist = browser.is_text_present('nach gibt es leider keine Reiseverbindungen')
if pdf_exists:
return True
else:
return False
def get_pdf_per_route(browser, route_name, didntwork):
for weekday in ['workdays', 'saturday', 'sunday']:
pdf_name = route_name + '_' + weekday
try:
pdf_exists = generate_pdf(browser, route_name, weekday, 'forward')
if pdf_exists:
download_pdf(browser, route_name, weekday, 'forward')
else:
didntwork.append(pdf_name)
print pdf_name
except:
didntwork.append(pdf_name)
print pdf_name
try:
pdf_exists = generate_pdf(browser, route_name, weekday, 'backward')
if pdf_exists:
download_pdf(browser, route_name, weekday, 'backward')
else:
didntwork.append(pdf_name)
print pdf_name
except:
didntwork.append(pdf_name)
print pdf_name
browser = Browser()
didntwork = []
for i in range(79,111):
route_name = str(i) + 'a'
get_pdf_per_route(browser, route_name, didntwork)
print didntwork
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment