dalelane · January 19, 2014 22:55
diff --git a/generateswimics.py b/generateswimics.py
 #
 # Create an ICS calendar with the sessions available at a DC Leisure Centre
 #     using the info found by scraping the timetable website
 # 
 #    http://dalelane.co.uk/blog/?p=3017 
 # 
 #    Dale Lane
 #       [email protected]
 #

 from bs4 import BeautifulSoup
 from dateutil import parser
 import urllib, urlparse
 import datetime, time
 import icalendar
 import os


 # 
 # CONSTANTS
 # 
 WEBSITEURL = "http://www.dcleisurecentres.co.uk/centres/fleming-park-leisure-centre/timetables/swim"
 WEBSITENAME = "Fleming Park"
 OUTPUT = "swimflemingpark.ics"


 #
 # get the date for the day with the provided name
 #  (assuming that it's either today or within a week)
 # 
 def get_next_weekday(now, dayname):
    # convert the day name to a number
    weekday = time.strptime(dayname, "%A").tm_wday
    days_ahead = weekday - now.weekday()
    if days_ahead < 0:
        days_ahead += 7
    return now + datetime.timedelta(days_ahead)

 #
 # download the schedule from the provided URL
 #   parse the contents, and return an array of items containing
 #   the sessions described on the page
 #
 def get_schedule(timetableurl, timetableday):
    # download the page 
    pagecontents = urllib.urlopen(timetableurl).read()
    # parse the page
    soup = BeautifulSoup(pagecontents)

    schedule = []

    # get the first timetable from the page (ignore the other timetables such 
    #  as the timetable for the Teaching Pool)
    timetables = soup.body.find_all("div", class_="timeTable", limit=1)
    for timetable in timetables:
        timetablerows = timetable.find_all("tr")        
        for timetablerow in timetablerows:
            timetableInfo = timetablerow.find_all("td")
            if len(timetableInfo) > 0:
                sessionTime = timetableInfo[0].string
                sessionType = timetableInfo[1].string

                sessionTimes = sessionTime.split(" - ")
                sessionStart = datetime.datetime.combine(timetableday, parser.parse(sessionTimes[0]).time())
                sessionFinish = datetime.datetime.combine(timetableday, parser.parse(sessionTimes[1]).time())

                schedule.append({ 
                    "type" : sessionType, 
                    "start" : sessionStart, 
                    "end" : sessionFinish
                })

    return schedule 

 #
 # download the page from the provided URL
 #   parse the contents, and find the navigation bar containing links 
 #   to the schedule for each day of the week
 #   return the links, with the dates each link is for
 #
 def get_days(basetimetableurl):
    now = datetime.datetime.now()

    pagecontents = urllib.urlopen(basetimetableurl).read()
    soup = BeautifulSoup(pagecontents)

    timetables = []

    weekNavMenus = soup.body.find_next("div", class_="ttNavSubDays")
    weekMenu = weekNavMenus.find_all("li")
    for day in weekMenu:
        link = day.find_all("a")
        if link:
            timetables.append({
                "url" : urlparse.urljoin(basetimetableurl, link[0].get('href')), 
                "day" : get_next_weekday(now, link[0].string)
            })

    return timetables


 #
 # 
 #


 # timestamp to give to the generated ICS file
 scriptruntime = datetime.datetime.now()

 # create the new icalendar
 cal = icalendar.Calendar()
 cal.add("prodid", "-//Swimming Timetable//" + WEBSITENAME + "//")
 cal.add("version", "2.0")

 # get the list of timetables to download and parse - one for each day
 timetables = get_days(WEBSITEURL)

 # for each timetable...
 for timetable in timetables:
    # download and parse the timetable and get the session info
    for timetableinfo in get_schedule(timetable['url'], timetable['day']):
        # create an event to represent the session 
        event = icalendar.Event()
        event.add("summary", timetableinfo['type'])
        event.add("dtstart", timetableinfo['start'])
        event.add("dtend", timetableinfo['end'])
        event.add("dtstamp", scriptruntime)
        # add the event to the calendar
        cal.add_component(event)

 # write the calendar to an ics file
 f = open(OUTPUT, 'wb')
 f.write(cal.to_ical())
 f.close()
	#
	# Create an ICS calendar with the sessions available at a DC Leisure Centre
	# using the info found by scraping the timetable website
	#
	# http://dalelane.co.uk/blog/?p=3017
	#
	# Dale Lane
	# [email protected]
	#

	from bs4 import BeautifulSoup
	from dateutil import parser
	import urllib, urlparse
	import datetime, time
	import icalendar
	import os


	#
	# CONSTANTS
	#
	WEBSITEURL = "http://www.dcleisurecentres.co.uk/centres/fleming-park-leisure-centre/timetables/swim"
	WEBSITENAME = "Fleming Park"
	OUTPUT = "swimflemingpark.ics"


	#
	# get the date for the day with the provided name
	# (assuming that it's either today or within a week)
	#
	def get_next_weekday(now, dayname):
	# convert the day name to a number
	weekday = time.strptime(dayname, "%A").tm_wday
	days_ahead = weekday - now.weekday()
	if days_ahead < 0:
	days_ahead += 7
	return now + datetime.timedelta(days_ahead)

	#
	# download the schedule from the provided URL
	# parse the contents, and return an array of items containing
	# the sessions described on the page
	#
	def get_schedule(timetableurl, timetableday):
	# download the page
	pagecontents = urllib.urlopen(timetableurl).read()
	# parse the page
	soup = BeautifulSoup(pagecontents)

	schedule = []

	# get the first timetable from the page (ignore the other timetables such
	# as the timetable for the Teaching Pool)
	timetables = soup.body.find_all("div", class_="timeTable", limit=1)
	for timetable in timetables:
	timetablerows = timetable.find_all("tr")
	for timetablerow in timetablerows:
	timetableInfo = timetablerow.find_all("td")
	if len(timetableInfo) > 0:
	sessionTime = timetableInfo[0].string
	sessionType = timetableInfo[1].string

	sessionTimes = sessionTime.split(" - ")
	sessionStart = datetime.datetime.combine(timetableday, parser.parse(sessionTimes[0]).time())
	sessionFinish = datetime.datetime.combine(timetableday, parser.parse(sessionTimes[1]).time())

	schedule.append({
	"type" : sessionType,
	"start" : sessionStart,
	"end" : sessionFinish
	})

	return schedule

	#
	# download the page from the provided URL
	# parse the contents, and find the navigation bar containing links
	# to the schedule for each day of the week
	# return the links, with the dates each link is for
	#
	def get_days(basetimetableurl):
	now = datetime.datetime.now()

	pagecontents = urllib.urlopen(basetimetableurl).read()
	soup = BeautifulSoup(pagecontents)

	timetables = []

	weekNavMenus = soup.body.find_next("div", class_="ttNavSubDays")
	weekMenu = weekNavMenus.find_all("li")
	for day in weekMenu:
	link = day.find_all("a")
	if link:
	timetables.append({
	"url" : urlparse.urljoin(basetimetableurl, link[0].get('href')),
	"day" : get_next_weekday(now, link[0].string)
	})

	return timetables


	#
	#
	#


	# timestamp to give to the generated ICS file
	scriptruntime = datetime.datetime.now()

	# create the new icalendar
	cal = icalendar.Calendar()
	cal.add("prodid", "-//Swimming Timetable//" + WEBSITENAME + "//")
	cal.add("version", "2.0")

	# get the list of timetables to download and parse - one for each day
	timetables = get_days(WEBSITEURL)

	# for each timetable...
	for timetable in timetables:
	# download and parse the timetable and get the session info
	for timetableinfo in get_schedule(timetable['url'], timetable['day']):
	# create an event to represent the session
	event = icalendar.Event()
	event.add("summary", timetableinfo['type'])
	event.add("dtstart", timetableinfo['start'])
	event.add("dtend", timetableinfo['end'])
	event.add("dtstamp", scriptruntime)
	# add the event to the calendar
	cal.add_component(event)

	# write the calendar to an ics file
	f = open(OUTPUT, 'wb')
	f.write(cal.to_ical())
	f.close()