Last active
May 23, 2019 07:08
-
-
Save virus-warnning/b3b10536bdb4707abd524229ed138838 to your computer and use it in GitHub Desktop.
重複漢字測試
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Unicode 重複漢字測試 | |
# | |
# 參考: http://maraboy.com/node/83 | |
# | |
# 執行結果: | |
# 你的 [懶叫] 跟我的 [懶叫] 不一樣 | |
# * 你的 [懶叫] 編碼是: [F90D,53EB] | |
# * 我的 [懶叫] 編碼是: [61F6,53EB] | |
# 用 [NFC] 喬一下你的 [懶叫] 和我的 [懶叫] | |
# 然後, 你的 [懶叫] 跟我的 [懶叫] 一樣 | |
# * 我們的 [懶叫] 編碼是 [61F6,53EB] | |
# | |
import unicodedata | |
def str2hex(s): | |
hex_list = [] | |
for ch in s: | |
hex_list.append('%04X' % ord(ch)) | |
return ','.join(hex_list) | |
def cmpstr(s1, s2): | |
if lp_yours == lp_mine: | |
print('你的 [%s] 跟我的 [%s] 一樣' % (s1, s2)) | |
print('* 我們的 [%s] 編碼是 [%s]' % (s1, str2hex(s1))) | |
else: | |
print('你的 [%s] 跟我的 [%s] 不一樣' % (s1, s2)) | |
print('* 你的 [%s] 編碼是: [%s]' % (s1, str2hex(s1))) | |
print('* 我的 [%s] 編碼是: [%s]' % (s2, str2hex(s2))) | |
lp_yours = '懶叫' | |
lp_mine = '懶叫' | |
cmpstr(lp_yours, lp_mine) | |
form = 'NFC' # NFC, NFKC, NFD, NFKD, 不是 NYKD 喔 | |
print('用 [%s] 喬一下你的 [%s] 和我的 [%s]' % (form, lp_yours, lp_mine)) | |
lp_yours = unicodedata.normalize(form, lp_yours) | |
lp_mine = unicodedata.normalize(form, lp_mine) | |
print('然後, ', end='') | |
cmpstr(lp_yours, lp_mine) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment