Skip to content

Instantly share code, notes, and snippets.

@sahib
Created April 16, 2012 21:37
Show Gist options
  • Save sahib/2401733 to your computer and use it in GitHub Desktop.
Save sahib/2401733 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import re, urllib.request, sys
from html.parser import HTMLParser
from urllib.error import HTTPError,URLError
class TableHTMLParser(HTMLParser):
def __init__(self):
super(TableHTMLParser,self).__init__()
self.__interest = False
self.__lastday = "Montag"
self.__partlist = []
self.__record = []
self.plandict = {}
def __add_record(self,day):
if len(self.__record) > 0:
self.plandict[self.__lastday] = self.__record
self.__record = []
self.__lastday = day
def handle_starttag(self, tag, attrs):
if tag == 'td' and ('class','data') in attrs:
self.__interest = True
def handle_endtag(self, tag):
self.__interest = False
def handle_data(self, data):
if self.__interest:
data = re.sub(r'-*','',data).strip()
if data in ["Montag","Dienstag","Mittwoch","Donnerstag","Freitag"]:
self.__add_record(data)
elif data != '':
self.__partlist.append(data)
if re.search('F[A-Z][0-9]*',data):
self.__record.append(tuple(self.__partlist))
self.__partlist = []
def feed(self,input_data):
super(TableHTMLParser,self).feed(input_data)
self.__add_record(self.__lastday)
def print_table(self):
for k,v in self.plandict.items():
print(k)
for t in v:
print('{time:18} {room:15} {name:32} {prof}'.format(
time = (t[0] + " bis " + t[1]),
room = t[-1],
name = t[2],
prof = t[-3]))
class Download():
def __init__(self,**param):
self.url = "http://www.hof-university.de/index.php?id=515&st={st}&fs={fs}&jahr=2012&semester=SS".format(**param)
try:
handle = urllib.request.urlopen(self.url)
self.data = str(handle.read(),'UTF-8')
except HTTPError as e:
print("Unable to load URL:",e,self.url)
sys.exit(-1)
except URLError as e:
print("Invalid URL:",e,self.url)
sys.exit(-2)
if __name__ == '__main__':
if len(sys.argv) < 3:
print('{} <Studiengang> <Fachsemester>'.format(sys.argv[0]))
sys.exit(-3)
parser = TableHTMLParser()
parser.feed(Download(**{
'st': sys.argv[1],
'fs': sys.argv[2]
}).data)
parser.print_table()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment