Skip to content

Instantly share code, notes, and snippets.

@xim
Last active May 7, 2019 13:04
Show Gist options
  • Save xim/59acea519e7c0443de3443912031309f to your computer and use it in GitHub Desktop.
Save xim/59acea519e7c0443de3443912031309f to your computer and use it in GitHub Desktop.
Urlview, done better -- with colors, context and unescaped links.
#!/usr/bin/env python3
import html.parser
import math
import os
import shutil
import subprocess
import sys
def pager():
return subprocess.Popen(['less', '-R'], stdin=subprocess.PIPE, universal_newlines=True)
def split_at_word_boundry(data):
mid = len(data) // 2
offset_start = offset_end = 0
for c in data[mid:0:-1]:
if c == ' ':
break
offset_start += 1
for c in data[mid:]:
if c == ' ':
break
offset_end += 1
if offset_start > offset_end:
mid += offset_end
else:
mid -= offset_start
return data[:mid], data[mid+1 if mid else 0:]
class Parser(html.parser.HTMLParser):
def __init__(self, fh):
super(Parser, self).__init__()
self._ignorable = 0
self._data = []
self.rendered_data = []
self.hrefs = []
self.feed(fh.read())
self._save_data()
def _save_data(self, split_previous=False):
data = ' '.join(self._data)
self._data.clear()
if split_previous and self.rendered_data:
self.rendered_data.extend(split_at_word_boundry(data))
else:
self.rendered_data.append(data)
def handle_starttag(self, tag, attrs):
if tag in ('head', 'script', 'style'):
self._ignorable += 1
elif not self._ignorable and tag == 'a':
self._save_data(True)
href = ''
for attr, value in attrs:
if attr == 'href':
href = value
self.hrefs.append(html.unescape(href))
def handle_endtag(self, tag):
if tag in ('head', 'script', 'style'):
self._ignorable -= 1
elif not self._ignorable and tag == 'a':
if not self._data:
self._data.append('<>')
self._save_data()
def handle_data(self, data):
if not self._ignorable:
data = ' '.join(data.split())
if data:
self._data.append(data)
def show_urls(in_fh, width=120):
p = Parser(in_fh)
for i, href in enumerate(p.hrefs):
rendered_i = i * 3 + 1
pre, mid, post = p.rendered_data[rendered_i-1:rendered_i+2]
pre_len = (width - len(mid)) // 2 - 1
if pre_len <= 0:
if pre:
pre = '...'
if post:
post = '...'
else:
if len(pre) > pre_len:
pre = '...' + pre[-(pre_len-3):]
post_len = width - len(pre) - len(mid) - (pre and 2 or 1)
if len(post) > post_len:
post = post[:post_len-3] + '...'
if pre:
pre += ' '
if post:
post = ' ' + post
text = '%s\x1b[0;36;40m%s\x1b[0m%s' % (pre, mid, post)
yield '%s\n %s\n' % (text, href)
if not p.hrefs:
yield 'No links in input...'
if __name__ == '__main__':
if len(sys.argv) == 1:
fh = sys.stdin
elif os.path.exists(sys.argv[1]):
fh = open(sys.argv[1])
else:
print('Pass a single file or stdin data as input.')
sys.exit(1)
p = pager()
width = shutil.get_terminal_size((120,1)).columns
try:
for line in show_urls(fh, width):
print(line, file=p.stdin)
except:
import traceback
p.stdin.write(traceback.format_exc())
finally:
p.stdin.close()
p.wait()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment