Skip to content

Instantly share code, notes, and snippets.

@kugland
Last active January 17, 2022 02:17
Show Gist options
  • Save kugland/ea350d2975c329f9c67a8141ca92d376 to your computer and use it in GitHub Desktop.
Save kugland/ea350d2975c329f9c67a8141ca92d376 to your computer and use it in GitHub Desktop.
Python script to get data from unicode character (from lib unicodedata)
#!/usr/bin/env python3
import re
import sys
import argparse
import unicodedata
if len(sys.argv) == 1:
sys.argv.append('-h')
# Usage:
# unicodedata <CHAR>
# unicodedata U+<HEX>
argparser = argparse.ArgumentParser(description='Print unicode data for a character')
argparser.add_argument('char', metavar='CHAR', type=str, nargs='?',
help='character or codepoint (U+XXXX)')
options = argparser.parse_args()
if re.match('^[Uu]\+[0-9A-Fa-f]+$', options.char):
options.char = chr(int(options.char[2:], 16))
char = options.char
def format_codepoint(code):
if code < 0x10000:
return 'U+{:04X}'.format(code)
else:
return 'U+{:06X}'.format(code)
print(f'Character: {char}')
print(f'Codepoint: {format_codepoint(ord(char))}')
try:
print(f'Name: {unicodedata.name(char)}')
except ValueError:
print('Name: <not found>')
print(f'Category: {unicodedata.category(char)}')
if unicodedata.bidirectional(char).strip():
print(f'Bidirectional: {unicodedata.bidirectional(char)}')
print(f'Combining class: {unicodedata.combining(char)}')
if unicodedata.decomposition(char).strip():
print(f'Decomposition: {unicodedata.decomposition(char)}')
try:
print(f'Decimal digit: {unicodedata.decimal(char)}')
except:
pass
try:
print(f'Digit: {unicodedata.digit(char)}')
except:
pass
print(f'UTF-8: {" ".join([f"{c:02X}" for c in char.encode("utf-8")])}')
print(f'UTF-16 LE: {" ".join([f"{c:02X}" for c in char.encode("utf-16-le")])}')
print(f'UTF-16 BE: {" ".join([f"{c:02X}" for c in char.encode("utf-16-be")])}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment