Skip to content

Instantly share code, notes, and snippets.

@cgoldberg
Created August 20, 2025 14:46
Show Gist options
  • Save cgoldberg/9cdd523accafeb9493986c4b51475638 to your computer and use it in GitHub Desktop.
Save cgoldberg/9cdd523accafeb9493986c4b51475638 to your computer and use it in GitHub Desktop.
Python - some handy functions for generating Unicode data
#!/usr/bin/env python
#
# Functions for generating Unicode data
import random
import unicodedata
def get_all_chars():
"""Returns a tuple of all Unicode characters."""
return tuple(chr(i) for i in range(0x0000, 1 + 0xFFFF, 1))
def get_printable_chars(chars):
"""Returns a tuple of all printable characters from a sequence
of characters.
A character is "printable" if it can be represented as a string
without hex-escaping.
"""
return tuple(char for char in chars if char.isprintable())
def get_whitespace_chars(chars):
"""Returns a tuple of all whitespace characters from a sequence
of characters.
A character is "whitespace" if in the Unicode character database,
either its general category is Zs ('Separator, space'), or its
bidirectional class is one of WS, B, or S.
"""
return tuple(char for char in chars if char.isspace())
def get_string_chars(chars):
"""Returns a tuple of string representations from a sequence
of characters.
"""
return tuple(repr(char) for char in chars)
def print_name_table(chars, width=30):
"""Prints a table of Unicode names and string representations
from a sequence of characters.
"""
print(f"{'Name':{width}}Character")
print("-" * 39)
for char in chars:
name = unicodedata.name(char, "Undefined").title()
print(f"{name:{width}}{repr(char)}")
def generate_random_string(chars, length):
"""Generates a random string of given length from a sequence
of characters.
"""
return "".join(random.choices(chars, k=length))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment