Skip to content

Instantly share code, notes, and snippets.

@maxrothman
Created October 14, 2014 17:03
Show Gist options
  • Save maxrothman/d15dfeb05a0a434b1be3 to your computer and use it in GitHub Desktop.
Save maxrothman/d15dfeb05a0a434b1be3 to your computer and use it in GitHub Desktop.
Parsing the Arisia HTML schedule for conversion into your favorite calendar format
#!/usr/bin/python
#Converts arisia HTML schedule to the following format:
#{name:NAME, start:YYYY-MM-DDTHH:MM:SS.000-05:00, end:(same as start), location:LOCATION, body:TEXT}
#name includes type
import re
from datetime import datetime, timedelta
import dateutil.parser
from sample import main
RFC3339 = '%Y-%m-%dT%H:%M:00.000-05:00'
events = []
thefile = "Schedule"
def parse(thefile):
with open(thefile) as f:
for line in f:
#set time
time = re.search("<hr /><h3>(.*?)</h3>", line)
if time:
TIME = dateutil.parser.parse(time.group(1))
continue
#get event title
#name = re.match("<dl><dt><b>(.*?)</b> <i>\xe2\x80\x94 (.*?)</i> <i>\xe2\x80\x94 (.*?)</i> <i>\xe2\x80\x94 (.*?)</i>( <i>\xe2\x80\x94 (.*?)</i>)?</dt>", line)
name = re.match("<dl><dt><b>(.*?)</b> <i>\xe2\x80\x94 (.*?)</i> <i>\xe2\x80\x94 (.*?)</i> <i>\xe2\x80\x94 (.*?)</i></dt>", line)
if name:
# if name.group(5):
# NAME, TYPE1, TYPE2, DURATION, LOCATION = name.groups()
# TYPE = TYPE1 + TYPE2
# else:
NAME, TYPE, DURATION, LOCATION = name.groups()
TYPE.replace('\xe2\x80\x94', ' ')
DURATION = re.match("(([0-9])hr)?( ([0-9]{1,2})min)?", DURATION)
hours = int(DURATION.group(2)) if DURATION.group(1) else 0
minutes = int(DURATION.group(4)) if DURATION.group(3) else 0
DURATION = timedelta(hours=hours, minutes=minutes)
continue
if '<a href=' in line: continue
body = re.match("<dd>(.*?)</dd>", line)
if body:
BODY = body.group(1)
events.append({'name': NAME,
'start': TIME.strftime(RFC3339),
'end': (TIME + DURATION).strftime(RFC3339),
'location': LOCATION,
'body': BODY
})
return events
if __name__ == '__main__':
parse(thefile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment