Created
July 20, 2014 11:35
-
-
Save lsfalimis/690e6ddb4f04ecdbf685 to your computer and use it in GitHub Desktop.
sort Chinese. Use thru Keyboard Maestro, see http://i.imgur.com/yAFq01d.png
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys | |
from pypinyin import pinyin | |
import pypinyin | |
def getPinyin (x): | |
y = pinyin(unicode(x,'utf-8'), style=pypinyin.TONE2) | |
lst = [] | |
for e in y: | |
lst = lst + e | |
lst = [x.encode('utf-8') for x in lst] | |
return lst | |
lst = str(sys.argv[1]).split() | |
#lst = ['值是','不的','啊不成','啊','啊不','啊成'] | |
n = len(lst) | |
# 1st repeating j loop will float the biggest item to the end of the list to become list[-1] | |
# 2nd repeating will float the second biggest item to become list[-2] | |
# At n-1 time, repeating j loop will float the second smallest item to become list[2]. | |
for j in range(n-1): | |
for i in range(n-1): | |
a, b = lst[i], lst[i+1] | |
e, f = getPinyin(a), getPinyin(b) | |
if e > f: | |
lst[i+1], lst[i] = lst[i], lst[i+1] | |
print ' '.join(lst) | |
# 我是文科生,放过我吧,能简化的话,麻烦告诉我一下,谢谢 :) |
# coding: utf-8
import sys
if sys.getdefaultencoding() != "utf-8":
reload(sys)
sys.setdefaultencoding("utf-8")
from pypinyin import pinyin, Style
lst = ['值是','不的','啊不成','啊','啊不','啊成']
opts = dict(style=Style.TONE2, heteronym=True)
enc_word = lambda w: pinyin(w if isinstance(w,unicode) else w.decode('utf-8'), **opts)
for w in sorted(lst, key=enc_word):
print w
这个库应该能解决楼主的问题:https://github.com/bmxbmx3/cn_sort
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
有个投机取巧的办法,把拼音连接成一个字符串作为排序的key即可