Skip to content

Instantly share code, notes, and snippets.

@PM2Ring
Created June 20, 2017 14:00
Show Gist options
  • Save PM2Ring/949539473ae56eb7bfb7d709512ea896 to your computer and use it in GitHub Desktop.
Save PM2Ring/949539473ae56eb7bfb7d709512ea896 to your computer and use it in GitHub Desktop.
Show the structure of an HTML file (or part thereof) in the shell
#!/usr/bin/env python3
''' Show HTML file structure
Written by PM 2Ring 2017.06.19
'''
import sys
from html.parser import HTMLParser
#HTML tags that do not have end tags.
unpaired = {'br', 'img', 'link', 'meta', 'hr', 'p', 'input'}
def bold(s):
''' Wrap `s` in ANSI escape sequences to make it bold '''
return '\x1b[1m%s\x1b[0m' % s
labels = ('START', 'END', 'DATA', 'COMMENT')
startlbl, endlbl, datalbl, commentlbl = map(bold, labels)
class DumpHTML(HTMLParser):
''' Dump HTML tags & data, with depth indicated by indentation '''
def __init__(self):
super().__init__()
self.depth = 0
@property
def line_head(self):
''' Line number and indentation '''
return str(self.getpos()[0]).zfill(4) + ' ' * self.depth
def handle_starttag(self, tag, attrs):
''' Print a start tag '''
print(self.line_head, startlbl, tag, dict(attrs))
if tag not in unpaired:
self.depth += 1
def handle_endtag(self, tag):
''' Print an end tag, unless it's an unpaired tag '''
if tag not in unpaired:
self.depth -= 1
print(self.line_head, endlbl, tag)
if self.depth < 0:
raise StopIteration
def handle_data(self, data):
''' Print data, unless it's all whitespace '''
if data.strip():
print(self.line_head, datalbl, repr(data))
def handle_comment(self, comment):
''' Print comment '''
print(self.line_head, commentlbl, repr(comment))
def main():
if len(sys.argv) != 2:
print('HTML file dumper\nUsage:\n%s filename' % sys.argv[0])
sys.exit()
with open(sys.argv[1]) as f:
data = f.read()
parser = DumpHTML()
try:
parser.feed(data)
parser.close()
except StopIteration:
msg = 'Data found at a higher nesting level than the starting data; skipping.'
print(bold('Warning:'), msg)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment