Skip to content

Instantly share code, notes, and snippets.

@benigumocom
Last active May 30, 2023 13:13
Show Gist options
  • Save benigumocom/09c6251defb565c1c4304ad5dc4abcda to your computer and use it in GitHub Desktop.
Save benigumocom/09c6251defb565c1c4304ad5dc4abcda to your computer and use it in GitHub Desktop.
クリップボードにコピーしているテキストのコードポイントや Unicode/UTF-16 エスケープシーケンスを確認するスクリプト 👉 https://android.benigumo.com/20230530/dump-text/
#!/usr/bin/env python
import pyperclip
from regex import regex
def graphemes(text):
return regex.findall(r'\X', text)
def str_to_hex_codepoints_str(text):
cps = [r'0x{:X}'.format(ord(c)) for c in text]
return ' '.join(cps)
def str_to_escape_sequence_unicode(text):
ss = graphemes(text)
cps = []
for s in ss:
for c in s:
cp = ord(c)
cps.append(cp)
return ''.join([r'\U{:08X}'.format(cp) for cp in cps])
def str_to_escape_sequence_utf16(text):
ss = graphemes(text)
cps = []
for s in ss:
for c in s:
cp = ord(c)
if cp > 0x10000:
cp -= 0x10000
hsg = cp // 0x400 + 0xd800
lsg = cp % 0x400 + 0xdc00
cps.append(hsg)
cps.append(lsg)
else:
cps.append(cp)
return ''.join([r'\u{:04X}'.format(cp) for cp in cps])
def dump(text):
ss = graphemes(text)
print(text)
print(len(ss))
print(len(text))
print(str_to_escape_sequence_unicode(text))
print(str_to_escape_sequence_utf16(text))
print(str_to_hex_codepoints_str(text))
print()
# ss(str) -> s(str) = cs(list) -> c(str)
for s in ss:
print(s)
print(str_to_escape_sequence_unicode(s))
print(str_to_escape_sequence_utf16(s))
print(str_to_hex_codepoints_str(s))
cs = list(s)
for c in cs:
uni = str_to_escape_sequence_unicode(c)
u16 = str_to_escape_sequence_utf16(c)
hcp = str_to_hex_codepoints_str(c)
print(' ', uni, u16, hcp, f'({c})')
print()
def main():
text = pyperclip.paste().strip()
# text = '🫶💀🔥🥹🫛👨‍👩‍👧‍👦'
dump(text)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment