richardjpope · December 10, 2015 15:09
diff --git a/gistfile1.py b/gistfile1.py
 # This is a very basic script to backup oyster card data to a scraperwiki vault
 # Notes:
 # 1) You need an oyster card that has been registered on tfl.gov.uk
 # 2) This script requires you to enter your username and password (this about what that means before progressing, and do so at your own risk)
 # 3) This script should be run in a PRIVATE SCRAPERWIKI VAULT ONLY https://scraperwiki.com/pricing/ not a public scraper, or the world will know your password

 import scraperwiki
 import mechanize
 import lxml.html
 from lxml.etree import tostring
 import csv

 username = 'YOUR TFL USERNAME'
 password = 'YOUR TFL PASSWORD'

 #setup browser
 br = mechanize.Browser()
 #br.set_all_readonly(False)    # allow everything to be written to
 br.set_handle_robots(False)   # no robots
 br.set_handle_refresh(False)  # can sometimes hang without this
 br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

 response = br.open('https://oyster.tfl.gov.uk/oyster/entry.do')

 #get the login form
 br.form = list(br.forms())[0]
 username_input = br.form.find_control("j_username")
 password_input = br.form.find_control("j_password")

 #enter password and submit
 username_input.value = username
 password_input.value = password
 response = br.submit()

 #find the journey history link
 journey_history_link = None
 for link in br.links():
    if link.text == 'Journey history':
        journey_history_link = link
        pass

 #if no history link, then raise an exception
 if journey_history_link == None:
    raise Exception('Failed to find jouney history link')

 #got to the jouney history page
 response = br.follow_link(journey_history_link)

 # mechanize doesnt seem to like the html here, so use lxml to find download link
 root = lxml.html.fromstring(response.read())
 download_link = root.cssselect("form#jhDownloadForm input")[0]
 download_href = download_link.attrib['onclick'].replace('document.jhDownloadForm.action="', '').replace('";document.jhDownloadForm.submit();', '')
 download_href = 'https://oyster.tfl.gov.uk' + download_href

 # download the csv
 response = br.open(download_href)

 # read and save
 csv_reader = csv.DictReader(response.read().splitlines())

 for row in csv_reader:           
    row['Journey Action'] = row['Journey/Action'] # rename this col, as database objects to the slash
    del(row['Journey/Action'])
    scraperwiki.sqlite.save(unique_keys=['Start Time', 'End Time', 'Date', 'Journey Action'], data=row)
	# This is a very basic script to backup oyster card data to a scraperwiki vault
	# Notes:
	# 1) You need an oyster card that has been registered on tfl.gov.uk
	# 2) This script requires you to enter your username and password (this about what that means before progressing, and do so at your own risk)
	# 3) This script should be run in a PRIVATE SCRAPERWIKI VAULT ONLY https://scraperwiki.com/pricing/ not a public scraper, or the world will know your password

	import scraperwiki
	import mechanize
	import lxml.html
	from lxml.etree import tostring
	import csv

	username = 'YOUR TFL USERNAME'
	password = 'YOUR TFL PASSWORD'

	#setup browser
	br = mechanize.Browser()
	#br.set_all_readonly(False) # allow everything to be written to
	br.set_handle_robots(False) # no robots
	br.set_handle_refresh(False) # can sometimes hang without this
	br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

	response = br.open('https://oyster.tfl.gov.uk/oyster/entry.do')

	#get the login form
	br.form = list(br.forms())[0]
	username_input = br.form.find_control("j_username")
	password_input = br.form.find_control("j_password")

	#enter password and submit
	username_input.value = username
	password_input.value = password
	response = br.submit()

	#find the journey history link
	journey_history_link = None
	for link in br.links():
	if link.text == 'Journey history':
	journey_history_link = link
	pass

	#if no history link, then raise an exception
	if journey_history_link == None:
	raise Exception('Failed to find jouney history link')

	#got to the jouney history page
	response = br.follow_link(journey_history_link)

	# mechanize doesnt seem to like the html here, so use lxml to find download link
	root = lxml.html.fromstring(response.read())
	download_link = root.cssselect("form#jhDownloadForm input")[0]
	download_href = download_link.attrib['onclick'].replace('document.jhDownloadForm.action="', '').replace('";document.jhDownloadForm.submit();', '')
	download_href = 'https://oyster.tfl.gov.uk' + download_href

	# download the csv
	response = br.open(download_href)

	# read and save
	csv_reader = csv.DictReader(response.read().splitlines())

	for row in csv_reader:
	row['Journey Action'] = row['Journey/Action'] # rename this col, as database objects to the slash
	del(row['Journey/Action'])
	scraperwiki.sqlite.save(unique_keys=['Start Time', 'End Time', 'Date', 'Journey Action'], data=row)