Created
January 27, 2016 13:23
-
-
Save ustroetz/b0e2a3c35fa259b07693 to your computer and use it in GitHub Desktop.
Script to download bus schedules from WienerLinien
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from time import sleep | |
| import requests | |
| from splinter import Browser | |
| import thread | |
| import pdb | |
| def download_pdf(browser, route_name, weekday, direction): | |
| pdf_url = browser.find_link_by_partial_href('FileOutputName')['href'] | |
| response = requests.get(pdf_url) | |
| filename = route_name + '_' + weekday + '_' + direction + '.pdf' | |
| with open(filename, 'wb') as f: | |
| f.write(response.content) | |
| def generate_pdf(browser, route_name, weekday, direction): | |
| url = "http://routenplaner.vor.at/bin/tb/trainsearch.exe/dn?&L=vs_p2w&lfp=1&" | |
| browser.visit(url) | |
| filed_values = {'trainname': route_name} | |
| browser.fill_form(filed_values) | |
| button = browser.find_by_name('HWAI=QUERY!advancedSearch=yes!&dummy') | |
| button.click() | |
| filed_values = {'valueDateBegin': 'Mo, 01.02.2016', 'valueDateEnd': 'Mo, 08.02.2016' } | |
| browser.fill_form(filed_values) | |
| if weekday == 'workdays': | |
| browser.check("checkWorkdays") | |
| browser.uncheck("checkSaturday") | |
| browser.uncheck("checkSunday") | |
| elif weekday == 'saturday': | |
| browser.uncheck("checkWorkdays") | |
| browser.check("checkSaturday") | |
| browser.uncheck("checkSunday") | |
| elif weekday == 'sunday': | |
| browser.uncheck("checkWorkdays") | |
| browser.uncheck("checkSaturday") | |
| browser.check("checkSunday") | |
| button = browser.find_by_name('sq') | |
| button.click() | |
| if browser.is_text_present('Ihre Abfrage kann zurzeit leider nicht berechnet werden.'): | |
| print 'Ihre Abfrage kann zurzeit leider nicht berechnet werden.' | |
| return False | |
| if not browser.is_text_present('Linienfahrplan'): | |
| print 'Empty page' | |
| return False | |
| if direction == 'forward': | |
| browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_0').uncheck() | |
| browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_1').check() | |
| elif direction == 'backward': | |
| browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_0').check() | |
| browser.find_by_id('Trainresult_GroupedAlternative_Category_7_elem_0_DirFlag_1').uncheck() | |
| button = browser.find_by_name('start') | |
| button.click() | |
| pdf_exists = False | |
| pdf_doesnt_exist = False | |
| while not pdf_exists and not pdf_doesnt_exist: | |
| sleep(10) | |
| pdf_exists = browser.is_text_present('ist fertiggestellt') | |
| pdf_doesnt_exist = browser.is_text_present('nach gibt es leider keine Reiseverbindungen') | |
| if pdf_exists: | |
| return True | |
| else: | |
| return False | |
| def get_pdf_per_route(browser, route_name, didntwork): | |
| for weekday in ['workdays', 'saturday', 'sunday']: | |
| pdf_name = route_name + '_' + weekday | |
| try: | |
| pdf_exists = generate_pdf(browser, route_name, weekday, 'forward') | |
| if pdf_exists: | |
| download_pdf(browser, route_name, weekday, 'forward') | |
| else: | |
| didntwork.append(pdf_name) | |
| print pdf_name | |
| except: | |
| didntwork.append(pdf_name) | |
| print pdf_name | |
| try: | |
| pdf_exists = generate_pdf(browser, route_name, weekday, 'backward') | |
| if pdf_exists: | |
| download_pdf(browser, route_name, weekday, 'backward') | |
| else: | |
| didntwork.append(pdf_name) | |
| print pdf_name | |
| except: | |
| didntwork.append(pdf_name) | |
| print pdf_name | |
| browser = Browser() | |
| didntwork = [] | |
| for i in range(79,111): | |
| route_name = str(i) + 'a' | |
| get_pdf_per_route(browser, route_name, didntwork) | |
| print didntwork |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment