EteimZ · November 11, 2023 17:44
diff --git a/dom.py b/dom.py
 from html.parser import HTMLParser

 class SimpleHTMLParser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.dom_tree = []

    def handle_starttag(self, tag, attrs):
        element = {'tag': tag, 'attributes': dict(attrs), 'content': []}
        if not self.dom_tree:
            self.dom_tree.append(element)
        else:
            parent = self.dom_tree[-1]
            parent['content'].append(element)
        self.dom_tree.append(element)

    def handle_endtag(self, tag):
        self.dom_tree.pop()

    def handle_data(self, data):
        if data.strip():
            parent = self.dom_tree[-1]
            parent['content'].append(data.strip())

 def parse_html(html_content):
    parser = SimpleHTMLParser()
    parser.feed(html_content)
    return parser.dom_tree[0]

 def print_dom_tree(element, indent=0):
    if isinstance(element, dict):
        print(' ' * indent + f"<{element['tag']} {element['attributes']}>")
        for content in element['content']:
            print_dom_tree(content, indent + 2)
        print(' ' * indent + f"</{element['tag']}>")
    elif isinstance(element, str):
        print(' ' * indent + element)

 html_content = """
 <html>
    <head>
        <title>Test</title>
    </head>
    <body>
        <h1>Parse me!</h1>
    </body>
 </html>
 """

 dom_tree = parse_html(html_content)
 print_dom_tree(dom_tree)
	from html.parser import HTMLParser

	class SimpleHTMLParser(HTMLParser):
	def __init__(self):
	super().__init__()
	self.dom_tree = []

	def handle_starttag(self, tag, attrs):
	element = {'tag': tag, 'attributes': dict(attrs), 'content': []}
	if not self.dom_tree:
	self.dom_tree.append(element)
	else:
	parent = self.dom_tree[-1]
	parent['content'].append(element)
	self.dom_tree.append(element)

	def handle_endtag(self, tag):
	self.dom_tree.pop()

	def handle_data(self, data):
	if data.strip():
	parent = self.dom_tree[-1]
	parent['content'].append(data.strip())

	def parse_html(html_content):
	parser = SimpleHTMLParser()
	parser.feed(html_content)
	return parser.dom_tree[0]

	def print_dom_tree(element, indent=0):
	if isinstance(element, dict):
	print(' ' * indent + f"<{element['tag']} {element['attributes']}>")
	for content in element['content']:
	print_dom_tree(content, indent + 2)
	print(' ' * indent + f"</{element['tag']}>")
	elif isinstance(element, str):
	print(' ' * indent + element)

	html_content = """
	<html>
	<head>
	<title>Test</title>
	</head>
	<body>
	<h1>Parse me!</h1>
	</body>
	</html>
	"""

	dom_tree = parse_html(html_content)
	print_dom_tree(dom_tree)
No results found