RobinDavid · February 24, 2014 20:48
diff --git a/html_parser.py b/html_parser.py
 from html.parser import HTMLParser			#Import the parser

 class HeadingParser(HTMLParser):		#create a subclass of HTMLParser which will overload handle..
    inHeading = False
    def handle_starttag(self, tag, attrs):		#Triggered when an opening tag is encountered
        if tag == "h1":						#if the tag is &lt;h1&gt;
            self.inHeading = True				#Change a variable which says we are in an header
            print("Found a Heading 1")
    def handle_data(self, data):				#Triggered when data found (the content of the tag)
        if self.inHeading:					#Useless, used just to filter content of h1's
            print(data)
    def handle_endtag(self, tag):				#Handle end of a tag
        if tag =="h1":						#Here if it is h1 put back "inHeading" to False
            self.inHeading = False

 hParser = HeadingParser()				#Create our object
 file = open("file.html", "r")			#Open the file
 html = file.read()						#Read entirely the file
 file.close()						#Close the file
 hParser.feed(html)					#Parse the file contained in the var "html"
	from html.parser import HTMLParser #Import the parser

	class HeadingParser(HTMLParser): #create a subclass of HTMLParser which will overload handle..
	inHeading = False
	def handle_starttag(self, tag, attrs): #Triggered when an opening tag is encountered
	if tag == "h1": #if the tag is <h1>
	self.inHeading = True #Change a variable which says we are in an header
	print("Found a Heading 1")
	def handle_data(self, data): #Triggered when data found (the content of the tag)
	if self.inHeading: #Useless, used just to filter content of h1's
	print(data)
	def handle_endtag(self, tag): #Handle end of a tag
	if tag =="h1": #Here if it is h1 put back "inHeading" to False
	self.inHeading = False

	hParser = HeadingParser() #Create our object
	file = open("file.html", "r") #Open the file
	html = file.read() #Read entirely the file
	file.close() #Close the file
	hParser.feed(html) #Parse the file contained in the var "html"