Last active
October 23, 2020 07:45
-
-
Save kzinmr/38ebcb7d4d0fb76917441b4d35f7badc to your computer and use it in GitHub Desktop.
日本語Unicode文字列をサンプリング https://stackoverflow.com/questions/1477294/generate-random-utf-8-string-in-python https://stackoverflow.com/questions/19899554/unicode-range-for-japanese
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
def get_random_unicode(length): | |
""" 以下表を参考に恣意的な文字範囲選択を行っている | |
http://www.rikai.com/library/kanjitables/kanji_codes.unicode.shtml | |
""" | |
try: | |
get_char = unichr | |
except NameError: | |
get_char = chr | |
# Update this to include code point ranges to be sampled | |
include_ranges = [ | |
( 0x3000, 0x3010 ), # Japanese-style punctuation | |
( 0x3041, 0x3093 ), # Hiragana | |
( 0x30a1, 0x30fc ), # Katakana | |
( 0xff10, 0xff65 ), # Full-width roman characters and half-width katakana | |
# ( 0x4e00, 0x9faf ), # CJK unifed ideographs - Common and uncommon kanji | |
# ( 0x3400, 0x4dbf ), # CJK unified ideographs Extension A - Rare kanji ( 3400 - 4dbf) | |
] | |
alphabet = [ | |
get_char(code_point) for current_range in include_ranges | |
for code_point in range(current_range[0], current_range[1] + 1) | |
] | |
return ''.join(random.choice(alphabet) for i in range(length)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment