Skip to content

Instantly share code, notes, and snippets.

@richardjpope
Last active December 10, 2015 15:09
Show Gist options
  • Save richardjpope/4452689 to your computer and use it in GitHub Desktop.
Save richardjpope/4452689 to your computer and use it in GitHub Desktop.
Oyster Card backup script for ScraperWiki.com Vault
# This is a very basic script to backup oyster card data to a scraperwiki vault
# Notes:
# 1) You need an oyster card that has been registered on tfl.gov.uk
# 2) This script requires you to enter your username and password (this about what that means before progressing, and do so at your own risk)
# 3) This script should be run in a PRIVATE SCRAPERWIKI VAULT ONLY https://scraperwiki.com/pricing/ not a public scraper, or the world will know your password
import scraperwiki
import mechanize
import lxml.html
from lxml.etree import tostring
import csv
username = 'YOUR TFL USERNAME'
password = 'YOUR TFL PASSWORD'
#setup browser
br = mechanize.Browser()
#br.set_all_readonly(False) # allow everything to be written to
br.set_handle_robots(False) # no robots
br.set_handle_refresh(False) # can sometimes hang without this
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
response = br.open('https://oyster.tfl.gov.uk/oyster/entry.do')
#get the login form
br.form = list(br.forms())[0]
username_input = br.form.find_control("j_username")
password_input = br.form.find_control("j_password")
#enter password and submit
username_input.value = username
password_input.value = password
response = br.submit()
#find the journey history link
journey_history_link = None
for link in br.links():
if link.text == 'Journey history':
journey_history_link = link
pass
#if no history link, then raise an exception
if journey_history_link == None:
raise Exception('Failed to find jouney history link')
#got to the jouney history page
response = br.follow_link(journey_history_link)
# mechanize doesnt seem to like the html here, so use lxml to find download link
root = lxml.html.fromstring(response.read())
download_link = root.cssselect("form#jhDownloadForm input")[0]
download_href = download_link.attrib['onclick'].replace('document.jhDownloadForm.action="', '').replace('";document.jhDownloadForm.submit();', '')
download_href = 'https://oyster.tfl.gov.uk' + download_href
# download the csv
response = br.open(download_href)
# read and save
csv_reader = csv.DictReader(response.read().splitlines())
for row in csv_reader:
row['Journey Action'] = row['Journey/Action'] # rename this col, as database objects to the slash
del(row['Journey/Action'])
scraperwiki.sqlite.save(unique_keys=['Start Time', 'End Time', 'Date', 'Journey Action'], data=row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment