Skip to content

Instantly share code, notes, and snippets.

@quandyfactory
Last active June 26, 2018 12:15
Show Gist options
  • Save quandyfactory/e12b697615d2356d77759881cc3131b0 to your computer and use it in GitHub Desktop.
Save quandyfactory/e12b697615d2356d77759881cc3131b0 to your computer and use it in GitHub Desktop.
# -*- coding: utf8 -*-
from HTMLParser import HTMLParser
import os
import requests
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
path = r'~\Desktop\election_results'
base_url = 'https://www.elections.on.ca'
# tuple of riding URLs and riding names
# I extracted them from the select options in the main results webpage
ridings = (
('/content/ngw/en/election-results/ajax.html', 'Ajax / Ajax'),
('/content/ngw/en/election-results/algoma-manitoulin.html', 'Algoma—Manitoulin / Algoma—Manitoulin'),
('/content/ngw/en/election-results/aurora-oak-ridges-richmond-hill.html', 'Aurora—Oak Ridges—Richmond Hill / Aurora—Oak Ridges—Richmond Hill'),
('/content/ngw/en/election-results/barrie-innisfil.html', 'Barrie—Innisfil / Barrie—Innisfil'),
('/content/ngw/en/election-results/barrie-springwater-oro-medonte.html', 'Barrie—Springwater—Oro-Medonte / Barrie—Springwater—Oro-Medonte'),
('/content/ngw/en/election-results/bay-of-quinte.html', 'Bay of Quinte / Baie de Quinte'),
('/content/ngw/en/election-results/beaches-east-york.html', 'Beaches—East York / Beaches—East York'),
('/content/ngw/en/election-results/brampton-centre.html', 'Brampton Centre / Brampton-Centre'),
('/content/ngw/en/election-results/brampton-east.html', 'Brampton East / Brampton-Est'),
('/content/ngw/en/election-results/brampton-north.html', 'Brampton North / Brampton-Nord'),
('/content/ngw/en/election-results/brampton-south.html', 'Brampton South / Brampton-Sud'),
('/content/ngw/en/election-results/brampton-west.html', 'Brampton West / Brampton-Ouest'),
('/content/ngw/en/election-results/brantford-brant.html', 'Brantford—Brant / Brantford—Brant'),
('/content/ngw/en/election-results/bruce-grey-owen-sound.html', 'Bruce—Grey—Owen Sound / Bruce—Grey—Owen Sound'),
('/content/ngw/en/election-results/burlington.html', 'Burlington / Burlington'),
('/content/ngw/en/election-results/cambridge.html', 'Cambridge / Cambridge'),
('/content/ngw/en/election-results/carleton.html', 'Carleton / Carleton'),
('/content/ngw/en/election-results/chatham-kent-leamington.html', 'Chatham-Kent—Leamington / Chatham-Kent—Leamington'),
('/content/ngw/en/election-results/davenport.html', 'Davenport / Davenport'),
('/content/ngw/en/election-results/don-valley-east.html', 'Don Valley East / Don Valley-Est'),
('/content/ngw/en/election-results/don-valley-north.html', 'Don Valley North / Don Valley-Nord'),
('/content/ngw/en/election-results/don-valley-west.html', 'Don Valley West / Don Valley-Ouest'),
('/content/ngw/en/election-results/dufferin-caledon.html', 'Dufferin—Caledon / Dufferin—Caledon'),
('/content/ngw/en/election-results/durham.html', 'Durham / Durham'),
('/content/ngw/en/election-results/eglinton-lawrence.html', 'Eglinton—Lawrence / Eglinton—Lawrence'),
('/content/ngw/en/election-results/elgin-middlesex-london.html', 'Elgin—Middlesex—London / Elgin—Middlesex—London'),
('/content/ngw/en/election-results/essex.html', 'Essex / Essex'),
('/content/ngw/en/election-results/etobicoke-centre.html', 'Etobicoke Centre / Etobicoke-Centre'),
('/content/ngw/en/election-results/etobicoke-lakeshore.html', 'Etobicoke—Lakeshore / Etobicoke—Lakeshore'),
('/content/ngw/en/election-results/etobicoke-north.html', 'Etobicoke North / Etobicoke-Nord'),
('/content/ngw/en/election-results/flamborough-glanbrook.html', 'Flamborough—Glanbrook / Flamborough—Glanbrook'),
('/content/ngw/en/election-results/glengarry-prescott-russell.html', 'Glengarry—Prescott—Russell / Glengarry—Prescott—Russell'),
('/content/ngw/en/election-results/guelph.html', 'Guelph / Guelph'),
('/content/ngw/en/election-results/haldimand-norfolk.html', 'Haldimand—Norfolk / Haldimand—Norfolk'),
('/content/ngw/en/election-results/haliburton-kawartha-lakes-brock.html', 'Haliburton—Kawartha Lakes—Brock / Haliburton—Kawartha Lakes—Brock'),
('/content/ngw/en/election-results/hamilton-centre.html', 'Hamilton Centre / Hamilton-Centre'),
('/content/ngw/en/election-results/hamilton-east-stoney-creek.html', 'Hamilton East—Stoney Creek / Hamilton-Est—Stoney Creek'),
('/content/ngw/en/election-results/hamilton-mountain.html', 'Hamilton Mountain / Hamilton Mountain'),
('/content/ngw/en/election-results/hamilton-west-ancaster-dundas.html', 'Hamilton West—Ancaster—Dundas / Hamilton-Ouest—Ancaster—Dundas'),
('/content/ngw/en/election-results/hastings-lennox-and-addington.html', 'Hastings—Lennox and Addington / Hastings—Lennox and Addington'),
('/content/ngw/en/election-results/humber-river-black-creek.html', 'Humber River—Black Creek / Humber River—Black Creek'),
('/content/ngw/en/election-results/huron-bruce.html', 'Huron—Bruce / Huron—Bruce'),
('/content/ngw/en/election-results/kanata-carleton.html', 'Kanata—Carleton / Kanata—Carleton'),
('/content/ngw/en/election-results/kenora-rainy-river.html', 'Kenora—Rainy River / Kenora—Rainy River'),
('/content/ngw/en/election-results/king-vaughan.html', 'King—Vaughan / King—Vaughan'),
('/content/ngw/en/election-results/kingston-and-the-islands.html', 'Kingston and the Islands / Kingston et les Îles'),
('/content/ngw/en/election-results/kitchener-centre.html', 'Kitchener Centre / Kitchener-Centre'),
('/content/ngw/en/election-results/kitchener-conestoga.html', 'Kitchener—Conestoga / Kitchener—Conestoga'),
('/content/ngw/en/election-results/kitchener-south-hespeler.html', 'Kitchener South—Hespeler / Kitchener-Sud—Hespeler'),
('/content/ngw/en/election-results/lambton-kent-middlesex.html', 'Lambton—Kent—Middlesex / Lambton—Kent—Middlesex'),
('/content/ngw/en/election-results/lanark-frontenac-kingston.html', 'Lanark—Frontenac—Kingston / Lanark—Frontenac—Kingston'),
('/content/ngw/en/election-results/leeds-grenville-thousand-islands-and-rideau-lakes.html', 'Leeds—Grenville—Thousand Islands and Rideau Lakes / Leeds—Grenville—Thousand Islands et Rideau Lakes'),
('/content/ngw/en/election-results/london-fanshawe.html', 'London—Fanshawe / London—Fanshawe'),
('/content/ngw/en/election-results/london-north-centre.html', 'London North Centre / London-Centre-Nord'),
('/content/ngw/en/election-results/london-west.html', 'London West / London-Ouest'),
('/content/ngw/en/election-results/markham-stouffville.html', 'Markham—Stouffville / Markham—Stouffville'),
('/content/ngw/en/election-results/markham-thornhill.html', 'Markham—Thornhill / Markham—Thornhill'),
('/content/ngw/en/election-results/markham-unionville.html', 'Markham—Unionville / Markham—Unionville'),
('/content/ngw/en/election-results/milton.html', 'Milton / Milton'),
('/content/ngw/en/election-results/mississauga-centre.html', 'Mississauga Centre / Mississauga-Centre'),
('/content/ngw/en/election-results/mississauga-east-cooksville.html', 'Mississauga East—Cooksville / Mississauga-Est—Cooksville'),
('/content/ngw/en/election-results/mississauga-erin-mills.html', 'Mississauga—Erin Mills / Mississauga—Erin Mills'),
('/content/ngw/en/election-results/mississauga-lakeshore.html', 'Mississauga—Lakeshore / Mississauga—Lakeshore'),
('/content/ngw/en/election-results/mississauga-malton.html', 'Mississauga—Malton / Mississauga—Malton'),
('/content/ngw/en/election-results/mississauga-streetsville.html', 'Mississauga—Streetsville / Mississauga—Streetsville'),
('/content/ngw/en/election-results/nepean.html', 'Nepean / Nepean'),
('/content/ngw/en/election-results/newmarket-aurora.html', 'Newmarket—Aurora / Newmarket—Aurora'),
('/content/ngw/en/election-results/niagara-centre.html', 'Niagara Centre / Niagara-Centre'),
('/content/ngw/en/election-results/niagara-falls.html', 'Niagara Falls / Niagara Falls'),
('/content/ngw/en/election-results/niagara-west.html', 'Niagara West / Niagara-Ouest'),
('/content/ngw/en/election-results/nickel-belt.html', 'Nickel Belt / Nickel Belt'),
('/content/ngw/en/election-results/nipissing.html', 'Nipissing / Nipissing'),
('/content/ngw/en/election-results/northumberland-peterborough-south.html', 'Northumberland—Peterborough South / Northumberland—Peterborough-Sud'),
('/content/ngw/en/election-results/oakville.html', 'Oakville / Oakville'),
('/content/ngw/en/election-results/oakville-north-burlington.html', 'Oakville North—Burlington / Oakville-Nord—Burlington'),
('/content/ngw/en/election-results/orl-ans.html', 'Orléans / Orléans'),
('/content/ngw/en/election-results/oshawa.html', 'Oshawa / Oshawa'),
('/content/ngw/en/election-results/ottawa-centre.html', 'Ottawa Centre / Ottawa-Centre'),
('/content/ngw/en/election-results/ottawa-south.html', 'Ottawa South / Ottawa-Sud'),
('/content/ngw/en/election-results/ottawa-vanier.html', 'Ottawa—Vanier / Ottawa—Vanier'),
('/content/ngw/en/election-results/ottawa-west-nepean.html', 'Ottawa West—Nepean / Ottawa-Ouest—Nepean'),
('/content/ngw/en/election-results/oxford.html', 'Oxford / Oxford'),
('/content/ngw/en/election-results/parkdale-high-park.html', 'Parkdale—High Park / Parkdale—High Park'),
('/content/ngw/en/election-results/parry-sound-muskoka.html', 'Parry Sound—Muskoka / Parry Sound—Muskoka'),
('/content/ngw/en/election-results/perth-wellington.html', 'Perth—Wellington / Perth—Wellington'),
('/content/ngw/en/election-results/peterborough-kawartha.html', 'Peterborough—Kawartha / Peterborough—Kawartha'),
('/content/ngw/en/election-results/pickering-uxbridge.html', 'Pickering—Uxbridge / Pickering—Uxbridge'),
('/content/ngw/en/election-results/renfrew-nipissing-pembroke.html', 'Renfrew—Nipissing—Pembroke / Renfrew—Nipissing—Pembroke'),
('/content/ngw/en/election-results/richmond-hill.html', 'Richmond Hill / Richmond Hill'),
('/content/ngw/en/election-results/st--catharines.html', 'St. Catharines / St. Catharines'),
('/content/ngw/en/election-results/sarnia-lambton.html', 'Sarnia—Lambton / Sarnia—Lambton'),
('/content/ngw/en/election-results/sault-ste--marie.html', 'Sault Ste. Marie / Sault Ste. Marie'),
('/content/ngw/en/election-results/scarborough-agincourt.html', 'Scarborough—Agincourt / Scarborough—Agincourt'),
('/content/ngw/en/election-results/scarborough-centre.html', 'Scarborough Centre / Scarborough-Centre'),
('/content/ngw/en/election-results/scarborough-guildwood.html', 'Scarborough—Guildwood / Scarborough—Guildwood'),
('/content/ngw/en/election-results/scarborough-north.html', 'Scarborough North / Scarborough-Nord'),
('/content/ngw/en/election-results/scarborough-rouge-park.html', 'Scarborough—Rouge Park / Scarborough—Rouge Park'),
('/content/ngw/en/election-results/scarborough-southwest.html', 'Scarborough Southwest / Scarborough-Sud-Ouest'),
('/content/ngw/en/election-results/simcoe-grey.html', 'Simcoe—Grey / Simcoe—Grey'),
('/content/ngw/en/election-results/simcoe-north.html', 'Simcoe North / Simcoe-Nord'),
('/content/ngw/en/election-results/spadina-fort-york.html', 'Spadina—Fort York / Spadina—Fort York'),
('/content/ngw/en/election-results/stormont-dundas-south-glengarry.html', 'Stormont—Dundas—South Glengarry / Stormont—Dundas—Sud-Glengarry'),
('/content/ngw/en/election-results/sudbury.html', 'Sudbury / Sudbury'),
('/content/ngw/en/election-results/thornhill.html', 'Thornhill / Thornhill'),
('/content/ngw/en/election-results/thunder-bay-atikokan.html', 'Thunder Bay—Atikokan / Thunder Bay—Atikokan'),
('/content/ngw/en/election-results/thunder-bay-superior-north.html', 'Thunder Bay—Superior North / Thunder Bay—Supérieur-Nord'),
('/content/ngw/en/election-results/timiskaming-cochrane.html', 'Timiskaming—Cochrane / Timiskaming—Cochrane'),
('/content/ngw/en/election-results/timmins.html', 'Timmins / Timmins'),
('/content/ngw/en/election-results/toronto-centre.html', 'Toronto Centre / Toronto-Centre'),
('/content/ngw/en/election-results/toronto-danforth.html', 'Toronto—Danforth / Toronto—Danforth'),
('/content/ngw/en/election-results/toronto-st--paul-s.html', 'Toronto—St. Paul\'s / Toronto—St. Paul\'s'),
('/content/ngw/en/election-results/university-rosedale.html', 'University—Rosedale / University—Rosedale'),
('/content/ngw/en/election-results/vaughan-woodbridge.html', 'Vaughan—Woodbridge / Vaughan—Woodbridge'),
('/content/ngw/en/election-results/waterloo.html', 'Waterloo / Waterloo'),
('/content/ngw/en/election-results/wellington-halton-hills.html', 'Wellington—Halton Hills / Wellington—Halton Hills'),
('/content/ngw/en/election-results/whitby.html', 'Whitby / Whitby'),
('/content/ngw/en/election-results/willowdale.html', 'Willowdale / Willowdale'),
('/content/ngw/en/election-results/windsor-tecumseh.html', 'Windsor—Tecumseh / Windsor—Tecumseh'),
('/content/ngw/en/election-results/windsor-west.html', 'Windsor West / Windsor-Ouest'),
('/content/ngw/en/election-results/york-centre.html', 'York Centre / York-Centre'),
('/content/ngw/en/election-results/york-simcoe.html', 'York—Simcoe / York—Simcoe'),
('/content/ngw/en/election-results/york-south-weston.html', 'York South—Weston / York-Sud—Weston'),
('/content/ngw/en/election-results/kiiwetinoong.html', 'Kiiwetinoong / Kiiwetinoong'),
('/content/ngw/en/election-results/mushkegowuk-james-bay.html', 'Mushkegowuk—James Bay / Mushkegowuk—Baie James'),
)
def get_riding_files():
"""Grabs each riding result HTML file and saves it into a local directory"""
for riding in ridings:
url = '%s%s' % (base_url, riding[0])
filename = '%s\\%s.html' % (path, riding[1].replace('—', '-').split(' / ')[0])
response = requests.get(url, proxies=proxyDict)
with open(filename, 'w') as myfile:
myfile.write(response.text.encode('utf8'))
def clean_riding_files():
"""Extacts the result HTML table from the results page.
If I had more self-esteem, I'd have used a regex but this is quick and dirty and it works."""
files = os.listdir(path)
marker1 = '<table class="table" align="center">'
marker2 = '</table>'
for f in files:
with open('%s\\%s' % (path, f), 'r') as myfile:
text = myfile.read()
text = text.decode('utf8')
text = text.split(marker1)
text = '<table>\n%s' % (text[1])
text = text.split(marker2)
text = '%s\n</table>' % (text[0])
text = text.replace('\t', '')
text = text.replace('<span class="eresults-language-divider"> /</span>', ' / ')
text = text.replace('<em>', '')
text = text.replace('</em>', '')
with open('%s\\%s' % (path, f), 'w') as writefile:
writefile.write(text.encode('utf8'))
def make_sql():
"""Writes a MySQL insert query"""
query = "insert into ontario_election_2018_results (riding, candidate, party, votes, percentage) values "
query_values = []
files = os.listdir(path)
for f in files:
riding = f.replace('.html', '')
print riding
with open('%s\\%s' % (path, f), 'r') as myfile:
text = myfile.read()
text = text.decode('utf8')
text = text.encode('utf8')
body = text.split('<tbody>')[1] # just get the table body
body = body.replace('<tr', '|<tr') # stick a special character in front of each row
body = strip_tags(body)
body = body.replace('\n', '\t') # replace newlines with tabs
while '\t\t' in body: body = body.replace('\t\t', '\t') # collapse duplicate tabs
body = body.replace('|', '\n') # switch the special character to a newline
body = body.strip() # trim leading and trailing whitespace
rows = body.split('\n') # get each row in its own file
for row in rows:
row = row.strip() # trim leading and trailing whitespace
cols = row.split('\t') # now split into rows
candidate = cols[0]
party = cols[1].split(' / ')[0]
votes = cols[2].replace(',', '') # strip out comma from numbers
percentage = cols[3].replace('%s', '') # strip out percent symbol
query_values.append("('%s', '%s', '%s', '%s', '%s')" % (riding, candidate, party, votes, percentage))
query_values_string = ', \n'.join(query_values)
print '%s\n%s' % (query, query_values_string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment