Skip to content

Instantly share code, notes, and snippets.

@tkf
Created February 7, 2018 10:35
Show Gist options
  • Save tkf/4cf4f4854b4ccca0dd184b81383341ba to your computer and use it in GitHub Desktop.
Save tkf/4cf4f4854b4ccca0dd184b81383341ba to your computer and use it in GitHub Desktop.
"""
Dump unicode character-name pairs.
See:
- https://docs.python.org/2/library/functions.html#unichr
- https://docs.python.org/2/library/unicodedata.html
"""
from __future__ import print_function
import os
try:
chr = unichr
range = xrange
except NameError:
pass
LAST_UNICODE = 0x10FFFF
def unicodes(first=0, last=LAST_UNICODE):
"""
>>> str(''.join(unicodes(ord('a'), ord('c'))))
'abc'
"""
try:
for i in range(first, last + 1):
yield chr(i)
except ValueError:
pass
def genunicodedata(first=0, last=LAST_UNICODE):
from unicodedata import name
for u in unicodes(first, last):
try:
yield u, name(u)
except ValueError:
pass
def dump_all_unicodes(sep, output, **kwds):
if getattr(output, 'encoding', None) is None:
import codecs
output = codecs.getwriter('UTF-8')(output)
with output:
for u, name in genunicodedata(**kwds):
print(u, name, sep=sep, file=output)
def make_parser(doc=__doc__):
import argparse
parser = argparse.ArgumentParser(
formatter_class=type('FormatterClass',
(argparse.RawDescriptionHelpFormatter,
argparse.ArgumentDefaultsHelpFormatter),
{}),
description=doc)
parser.add_argument('--first', default=ord('~') + 1, type=int,
help='(inclusive)')
parser.add_argument('--last', default=LAST_UNICODE, type=int,
help='(inclusive)')
parser.add_argument('--sep', default=': ',
help='separate unicode and its name')
parser.add_argument('output', nargs='?', type=argparse.FileType('w'),
default=os.path.expanduser('~/var/unicodes'),
help='"-" means stdout.')
return parser
def main(args=None):
parser = make_parser()
ns = parser.parse_args(args)
dump_all_unicodes(**vars(ns))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment