Last active
August 29, 2015 14:05
-
-
Save tmeissner/e6b47776d562b22ab629 to your computer and use it in GitHub Desktop.
Extract menu data from html file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
import xml.dom.minidom as DOM | |
class SpeisePlan: | |
'''Class for parsing and extracting the meal data for each day of a week | |
lying in a given HTML file''' | |
def __init__(self, fileName): | |
'''Initialise the wochenplan{} dictionary and call the parse() method | |
to load the html file given by the fileName parameter.''' | |
self.days = ("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag") | |
self.wochenplan = dict() | |
for index in range(len(self.days)): | |
self.wochenplan[self.days[index]] = [] | |
self.parse(fileName) | |
def parse(self, fileName): | |
'''Load (and parse) the HTML file given by fileName parameter using the | |
parse() method of the xml.dom.minidom package''' | |
self.domTree = DOM.parse(fileName) | |
def findDayNodes(self): | |
'''Find DOM nodes for the days of the week and push them into the | |
dayNodes[] array''' | |
self.dayNodes = [] | |
divNodes = self.domTree.getElementsByTagName('div') | |
index = 0 | |
for element in divNodes: | |
if(element.getAttribute('class') == 'zeile_tage'): | |
if (3 <= index <= 6): | |
self.dayNodes.append(element) | |
index = index + 1 | |
def genPlan(self): | |
'''Generate meal data plan for the whole week by going through the | |
DOM nodes in the dayNodes[] array and extracting the data of all divs | |
with the suitable 'spalte_tag' class attribute''' | |
self.findDayNodes() | |
for element in self.dayNodes: | |
children = element.childNodes | |
index = 0 | |
for child in children: | |
if(child.nodeName == 'div'): | |
if(child.getAttribute('class') == 'spalte_tag'): | |
content = child.firstChild.data.replace('"', '') | |
self.wochenplan[self.days[index]].append(content) | |
index = index + 1 | |
def printPlan(self): | |
'''Print out the wochenplan dictionary''' | |
print(self.wochenplan) | |
def main(): | |
plan = SpeisePlan('speiseplan.html') | |
plan.genPlan() | |
plan.printPlan() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment