Created
March 26, 2019 00:09
-
-
Save waylan/1f367038e706511388c488bdbf807c82 to your computer and use it in GitHub Desktop.
A simple test HTMLParser to see how the HTMLParser for python works.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
import HTMLParser as parser | |
except ImportError: | |
from html import parser | |
class TestParser(parser.HTMLParser): | |
def handle_starttag(self, tag, attrs): | |
print ("STAG:", tag) | |
def handle_endtag(self, tag): | |
print ("ETAG:", tag) | |
def handle_data(self, data): | |
print ("DATA:", data) | |
def handle_comment(self, data): | |
print ("CMNT:", data) | |
def handle_decl(self, data): | |
print ("DECL:", data) | |
def handle_pi(self, data): | |
print ('PI :', data) | |
def unknown_decl(self, data): | |
print ("UNDC:", data) | |
if __name__ == '__main__': | |
t = '<div><![CDATA[ foo ]]></div>' | |
parser = TestParser() | |
parser.feed(t) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment