Skip to content

Instantly share code, notes, and snippets.

@aheadley
Last active January 7, 2016 20:07
Show Gist options
  • Save aheadley/d613e645f3715c64be5b to your computer and use it in GitHub Desktop.
Save aheadley/d613e645f3715c64be5b to your computer and use it in GitHub Desktop.
Tiny webapp to convert AGDQ 2016 schedule page to CSV for importing into Google calendar
#!/bin/env python2
# -*- encoding: utf-8 -*-
import csv
import datetime
import pytz
import flask
import bs4
import requests
AGDQ_SCHEDULE_URL = 'https://gamesdonequick.com/schedule'
TIMESTAMP_FMT = '%Y-%m-%dT%H:%M:%SZ'
CSV_DATE_FMT = '%m/%d/%Y'
CSV_TIME_FMT = '%I:%M %p'
CSV_KEYS = ['Subject', 'Start Date', 'Start Time', 'End Date', 'End Time', \
'All Day Event', 'Description']
app = flask.Flask(__name__)
@app.route('/')
def export_schedule_csv():
schedule_resp = get_stream_url(AGDQ_SCHEDULE_URL)
parsed_table = parse_schedule_page(schedule_resp)
return flask.Response(generate_schedule_csv(parsed_table), mimetype='text/csv')
def get_stream_url(url):
resp = requests.get(url, stream=True)
return resp.content
def parse_schedule_page(page_content):
iter_cells = lambda row: row.find_all(name='td', recursive=False)
soup = bs4.BeautifulSoup(page_content, 'html.parser')
table = soup.find(id='runTable').parent
keys = [cell.text.lower().replace(' ', '_') \
for cell in iter_cells(table.thead.tr)]
table_data = [dict(zip(keys, (cell.text.encode('utf-8') for cell in iter_cells(row)))) \
for row in table.tbody.find_all(name='tr', recursive=False)]
return table_data
def generate_schedule_csv(table_data, local_timezone='America/Detroit'):
quote = lambda s: '"{}"'.format(s.replace('"',r'\"'))
fmt_time = lambda dt: dt.strftime(CSV_TIME_FMT)
fmt_date = lambda dt: dt.strftime(CSV_DATE_FMT)
parse_timedelta = lambda s: datetime.timedelta( \
**dict(zip( \
['seconds', 'minutes', 'hours'], \
(int(i) for i in s.split(':')[::-1])) \
))
parse_timestamp = lambda ts: pytz.utc.localize(datetime.datetime.strptime( \
ts, TIMESTAMP_FMT)).astimezone(pytz.timezone(local_timezone))
csv_fmt_header = lambda: ','.join(CSV_KEYS) + '\n'
def csv_fmt_data_row(row):
start_dt = parse_timestamp(row['start_time'])
end_dt = start_dt + parse_timedelta(row['setup_time']) + \
parse_timedelta(row['run_time'])
return ','.join(quote(i) for i in [
row['name'],
fmt_date(start_dt),
fmt_time(start_dt),
fmt_date(end_dt),
fmt_time(end_dt),
'False',
' '.join([row['category'], row['description']]),
]) + '\n'
yield csv_fmt_header()
for row in table_data:
# skip the last row because it doesn't have as many cells as the rest
# (easier than an actual fix)
if row['name'] != 'Finale!':
yield csv_fmt_data_row(row)
if __name__ == '__main__':
app.run(debug=True)
beautifulsoup4==4.4.1
Flask==0.10.1
itsdangerous==0.24
Jinja2==2.8
MarkupSafe==0.23
pytz==2015.7
requests==2.9.1
Werkzeug==0.11.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment