Skip to content

Instantly share code, notes, and snippets.

@ctrlcctrlv
Created June 4, 2019 09:34
Show Gist options
  • Select an option

  • Save ctrlcctrlv/9cbfd9863cf415548e3fe4c09597a596 to your computer and use it in GitHub Desktop.

Select an option

Save ctrlcctrlv/9cbfd9863cf415548e3fe4c09597a596 to your computer and use it in GitHub Desktop.
Command line programs for working with HTML5 character entities.
#!/usr/bin/env python3
import html
import sys
if sys.stdin.isatty():
try:
while 1:
sys.stdout.write(html.unescape(input())+'\n')
except EOFError: pass
else:
for line in sys.stdin:
sys.stdout.write(html.unescape(line))
#!/usr/bin/env python3
import html.entities as he
import sys
html5_r = dict()
for k, v in he.html5.items():
if not v in html5_r:
html5_r[v] = list()
html5_r[v].append('&'+k)
def find(uni):
if not uni in html5_r:
return ["&#x{:X};".format(ord(uni))]
else:
return html5_r[uni]
if sys.stdin.isatty():
try:
while 1:
out = [' or '.join(find(c)) for c in input()]
out = ' followed by '.join(out)
sys.stdout.write(out+'\n')
except EOFError: pass
else:
for line in sys.stdin:
sys.stdout.write(html.unescape(line))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment