Created
March 21, 2015 02:42
-
-
Save kitsuyui/1e136a2846e0094c28b2 to your computer and use it in GitHub Desktop.
その透明な文字に混じらず、見つけ出すんだ。 ref: http://qiita.com/kitsuyui/items/12db383f5e5971f32b08
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
U+20 SPACE | |
U+A0 NO-BREAK SPACE | |
U+2000 EN QUAD | |
U+2001 EM QUAD | |
U+2002 EN SPACE | |
U+2003 EM SPACE | |
U+2004 THREE-PER-EM SPACE | |
U+2005 FOUR-PER-EM SPACE | |
U+2006 SIX-PER-EM SPACE | |
U+2007 FIGURE SPACE | |
U+2008 PUNCTUATION SPACE | |
U+2009 THIN SPACE | |
U+200A HAIR SPACE | |
U+205F MEDIUM MATHEMATICAL SPACE | |
U+3000 IDEOGRAPHIC SPACE | |
U+FFA0 HALFWIDTH HANGUL FILLER |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
U+FFA0 HALFWIDTH HANGUL FILLER |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
U+3164 HANGUL FILLER |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unicodedata | |
from PIL import Image | |
from PIL import ImageDraw | |
from PIL import ImageFont | |
FONT = ImageFont.truetype('./unifont-7.0.06.ttf', 32) | |
def main(): | |
assert_principles() | |
for char in invisible_characters(): | |
try: | |
name = unicodedata.name(char) | |
except ValueError: | |
name = '' | |
line = 'U+{:X}\t{}'.format(ord(char), name) | |
print(line) | |
def invisible_characters(): | |
'''空白として描画された文字のみを返却する''' | |
empty_draw_bytes = letter_draw_bytes(' ') | |
for char in unicode_characters(): | |
draw_bytes = letter_draw_bytes(char) | |
if draw_bytes == empty_draw_bytes: | |
yield char | |
def unicode_characters(): | |
'''Unicode コードポイントに含まれる文字全てを返すジェネレータ | |
ただし、 PUA (Private Use Area) は含めない。 | |
''' | |
for codepoint in range(0, 0xE000): | |
yield chr(codepoint) | |
for codepoint in range(0xF8FF + 1, 0xF0000): | |
yield chr(codepoint) | |
for codepoint in range(0xFFFFD + 1, 0x100000): | |
yield chr(codepoint) | |
for codepoint in range(0x10FFFD + 1, 0x10FFFF): | |
yield chr(codepoint) | |
def letter_draw_bytes(char): | |
'''空白の画像に文字を描画した際の bytes を取得する''' | |
image = Image.new('RGB', (32, 32), (255, 255, 255)) | |
draw = ImageDraw.Draw(image) | |
draw.text((0, 0), char, font=FONT, fill='#000') | |
return image.tobytes() | |
def assert_principles(): | |
'''以下の原則が成立することをいくつかの例でチェック''' | |
f = letter_draw_bytes | |
assert f('あ') == f('あ') # 同じ文字の bytes は同じ | |
assert f('') == f(' ') == f(' ') # スペースは空文字列と同じ bytes になる | |
assert f('あ') != f('い') # 見た目が異なる文字の bytes は異なる | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
U+9 | |
U+A | |
U+B | |
U+C | |
U+D | |
U+20 SPACE | |
U+85 | |
U+A0 NO-BREAK SPACE | |
U+2000 EN QUAD | |
U+2001 EM QUAD | |
U+2002 EN SPACE | |
U+2003 EM SPACE | |
U+2004 THREE-PER-EM SPACE | |
U+2005 FOUR-PER-EM SPACE | |
U+2006 SIX-PER-EM SPACE | |
U+2007 FIGURE SPACE | |
U+2008 PUNCTUATION SPACE | |
U+2009 THIN SPACE | |
U+200A HAIR SPACE | |
U+2028 LINE SEPARATOR | |
U+2029 PARAGRAPH SEPARATOR | |
U+202F NARROW NO-BREAK SPACE | |
U+205F MEDIUM MATHEMATICAL SPACE | |
U+3000 IDEOGRAPHIC SPACE | |
U+180E MONGOLIAN VOWEL SEPARATOR | |
U+200B ZERO WIDTH SPACE | |
U+200C ZERO WIDTH NON-JOINER | |
U+200D ZERO WIDTH JOINER | |
U+2060 WORD JOINER | |
U+2061 FUNCTION APPLICATION | |
U+2062 INVISIBLE TIMES | |
U+2063 INVISIBLE SEPARATOR | |
U+2064 INVISIBLE PLUS | |
U+3164 HANGUL FILLER | |
U+FFA0 HALFWIDTH HANGUL FILLER | |
U+FEFF ZERO WIDTH NO-BREAK SPACE |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AB |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
A{U+2060}B |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment