Last active
January 7, 2016 20:07
-
-
Save aheadley/d613e645f3715c64be5b to your computer and use it in GitHub Desktop.
Tiny webapp to convert AGDQ 2016 schedule page to CSV for importing into Google calendar
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python2 | |
# -*- encoding: utf-8 -*- | |
import csv | |
import datetime | |
import pytz | |
import flask | |
import bs4 | |
import requests | |
AGDQ_SCHEDULE_URL = 'https://gamesdonequick.com/schedule' | |
TIMESTAMP_FMT = '%Y-%m-%dT%H:%M:%SZ' | |
CSV_DATE_FMT = '%m/%d/%Y' | |
CSV_TIME_FMT = '%I:%M %p' | |
CSV_KEYS = ['Subject', 'Start Date', 'Start Time', 'End Date', 'End Time', \ | |
'All Day Event', 'Description'] | |
app = flask.Flask(__name__) | |
@app.route('/') | |
def export_schedule_csv(): | |
schedule_resp = get_stream_url(AGDQ_SCHEDULE_URL) | |
parsed_table = parse_schedule_page(schedule_resp) | |
return flask.Response(generate_schedule_csv(parsed_table), mimetype='text/csv') | |
def get_stream_url(url): | |
resp = requests.get(url, stream=True) | |
return resp.content | |
def parse_schedule_page(page_content): | |
iter_cells = lambda row: row.find_all(name='td', recursive=False) | |
soup = bs4.BeautifulSoup(page_content, 'html.parser') | |
table = soup.find(id='runTable').parent | |
keys = [cell.text.lower().replace(' ', '_') \ | |
for cell in iter_cells(table.thead.tr)] | |
table_data = [dict(zip(keys, (cell.text.encode('utf-8') for cell in iter_cells(row)))) \ | |
for row in table.tbody.find_all(name='tr', recursive=False)] | |
return table_data | |
def generate_schedule_csv(table_data, local_timezone='America/Detroit'): | |
quote = lambda s: '"{}"'.format(s.replace('"',r'\"')) | |
fmt_time = lambda dt: dt.strftime(CSV_TIME_FMT) | |
fmt_date = lambda dt: dt.strftime(CSV_DATE_FMT) | |
parse_timedelta = lambda s: datetime.timedelta( \ | |
**dict(zip( \ | |
['seconds', 'minutes', 'hours'], \ | |
(int(i) for i in s.split(':')[::-1])) \ | |
)) | |
parse_timestamp = lambda ts: pytz.utc.localize(datetime.datetime.strptime( \ | |
ts, TIMESTAMP_FMT)).astimezone(pytz.timezone(local_timezone)) | |
csv_fmt_header = lambda: ','.join(CSV_KEYS) + '\n' | |
def csv_fmt_data_row(row): | |
start_dt = parse_timestamp(row['start_time']) | |
end_dt = start_dt + parse_timedelta(row['setup_time']) + \ | |
parse_timedelta(row['run_time']) | |
return ','.join(quote(i) for i in [ | |
row['name'], | |
fmt_date(start_dt), | |
fmt_time(start_dt), | |
fmt_date(end_dt), | |
fmt_time(end_dt), | |
'False', | |
' '.join([row['category'], row['description']]), | |
]) + '\n' | |
yield csv_fmt_header() | |
for row in table_data: | |
# skip the last row because it doesn't have as many cells as the rest | |
# (easier than an actual fix) | |
if row['name'] != 'Finale!': | |
yield csv_fmt_data_row(row) | |
if __name__ == '__main__': | |
app.run(debug=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beautifulsoup4==4.4.1 | |
Flask==0.10.1 | |
itsdangerous==0.24 | |
Jinja2==2.8 | |
MarkupSafe==0.23 | |
pytz==2015.7 | |
requests==2.9.1 | |
Werkzeug==0.11.3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment