Created
August 17, 2011 06:34
-
-
Save blackbing/1150937 to your computer and use it in GitHub Desktop.
為了解決中文筆劃排序的問題(只適用繁體中文)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 a440 a441 | |
2 a442 a453 | |
3 a454 a47e | |
4 a4a1 a4fd | |
5 a4fe a5df | |
6 a5e0 a6e9 | |
7 a6ea a8c2 | |
8 a8c3 ab44 | |
9 ab45 adbb | |
10 adbc b0ad | |
11 b0ae b3c2 | |
12 b3c3 b6c3 | |
13 b6c4 b9ab | |
14 b9ac bbf4 | |
15 bbf5 bea6 | |
16 bea7 c074 | |
17 c075 c24e | |
18 c24f c35e | |
19 c35f c454 | |
20 c455 c4d6 | |
21 c3d7 c56a | |
22 c56b c5c7 | |
23 c5c8 c5c7 | |
24 c5f1 c654 | |
25 c655 c664 | |
26 c665 c66b | |
27 c66c c675 | |
28 c676 c67a | |
29 c67b c67e | |
2 c940 c944 | |
3 c945 c94c | |
4 c94d c95c | |
5 c95d c9aa | |
6 c9ab c959 | |
7 ca5a cbb0 | |
8 cbb1 cddc | |
9 cddd d0c7 | |
10 d0c8 d44a | |
11 d44b d850 | |
12 d851 dcb0 | |
13 dcb1 e0ef | |
14 e0f0 e4e5 | |
15 e4e6 e8f3 | |
16 e8f4 ecb8 | |
17 ecb9 efb6 | |
18 efb7 f1ea | |
19 f1eb f3fc | |
20 f3fd f5bf | |
21 f5c0 f6d5 | |
22 f6d6 f7cf | |
23 f6d6 f7cf | |
24 f8a5 f8ed | |
25 f8e9 f96a | |
26 f96b f9a1 | |
27 f9a2 f9b9 | |
28 f9ba f9c5 | |
29 f9c6 f9dc | |
9 f9da f9da | |
12 f9db f9db | |
13 f9d6 f9d8 | |
15 f9dc f9dc | |
16 f9d9 f9d9 | |
30 c67b c67d | |
30 f9cc f9cf | |
31 f9c6 f9c6 | |
31 f9d0 f9d0 | |
32 f9d1 f9d1 | |
33 c67e c67e | |
33 f9d2 f9d2 | |
34 f9d3 f9d3 | |
36 f9d4 f9d5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<? | |
/* | |
* Author: [email protected] | |
* Desc: 為了解決中文筆劃排序的問題(只適用繁體中文) | |
* php 可以直接執行cht_strokesort | |
* http API : http://localhost/cht_strokesort.php?compare=%E9%A9%A2%E5%AD%90,%E5%8F%B2%E7%91%9E%E5%85%8B,%E7%99%BD%E9%9B%AA%E5%85%AC%E4%B8%BB,%E9%95%B7%E9%9D%B4%E8%B2%93,%E5%B0%8F%E6%9C%A8%E5%81%B6,%E8%96%91%E9%A4%85%E4%BA%BA, %E4%B8%80,%E4%BA%8C,%E4%B8%89,a,b,c,1,2,5,%E4%B8%83&callback=test&dont_sort=true | |
* 注意一定要用UTF8 encodeURIComponent傳遞參數 | |
*/ | |
define("BIG5_HB_MIN" , 0x81); // 高位元組最小值 | |
define("BIG5_HB_MAX" , 0xfe); // 高位元組最大值 | |
define("BIG5_LB1_MIN" , 0x40); // 低位元組最小值 | |
define("BIG5_LB1_MAX" , 0x7e); // 低位元組最大值 | |
define("BIG5_LB2_MIN" , 0xa1); // 低位元組最小值 | |
define("BIG5_LB2_MAX" , 0xfe); // 低位元組最大值 | |
function big5_isHB($c) { | |
$asc = Ord($c); | |
if($asc>=BIG5_HB_MIN && $asc<=BIG5_HB_MAX) return true; | |
return false; | |
} | |
function big5_isLB($c) { | |
$asc = Ord($c); | |
if(($asc>=BIG5_LB1_MIN && $asc<=BIG5_LB1_MAX) || ($asc>=BIG5_LB2_MIN && $asc<=BIG5_LB2_MAX)) | |
return true; | |
return false; | |
} | |
function utf8_2_big5($utf8_str) { | |
$i=0; | |
$len = strlen($utf8_str); | |
$big5_str=""; | |
for ($i=0;$i<$len;$i++) { | |
$sbit = ord(substr($utf8_str,$i,1)); | |
if ($sbit < 128) { | |
$big5_str.=substr($utf8_str,$i,1); | |
} else if($sbit > 191 && $sbit < 224) { | |
$new_word=iconv("UTF-8","Big5",substr($utf8_str,$i,2)); | |
$big5_str.=($new_word=="")?(mb_convert_encoding(substr($utf8_str,$i,3), 'HTML-ENTITIES', 'UTF-8')):$new_word; | |
$i++; | |
} else if($sbit > 223 && $sbit < 240) { | |
$new_word=iconv("UTF-8","Big5",substr($utf8_str,$i,3)); | |
$big5_str.=($new_word=="")?(mb_convert_encoding(substr($utf8_str,$i,3), 'HTML-ENTITIES', 'UTF-8')):$new_word; | |
$i+=2; | |
} else if($sbit > 239 && $sbit < 248) { | |
$new_word=iconv("UTF-8","Big5",substr($utf8_str,$i,4)); | |
$big5_str.=($new_word=="")?(mb_convert_encoding(substr($utf8_str,$i,3), 'HTML-ENTITIES', 'UTF-8')):$new_word; | |
$i+=3; | |
} | |
} | |
return $big5_str; | |
} | |
// 計算中文字筆劃 | |
function big5_stroke($str) | |
{ | |
$tab=@File("./big5_stroke.tab"); | |
if(!$tab) | |
{ | |
echo "Can't Open file big5_stroke.tab, plz check define BIG5_FILE_DIR is valid"; | |
exit; | |
} | |
/* 讀取轉換表至陣列 $StrokeMapping */ | |
$i=0; | |
while(list($key,$val)=Each($tab)) | |
{ | |
$StrokeMapping[$i] = split(" ",$val); | |
$StrokeMapping[$i][1] = HexDec($StrokeMapping[$i][1]); | |
$StrokeMapping[$i][2] = HexDec($StrokeMapping[$i][2]); | |
$i++; | |
} | |
$s1 = substr($str,0,1); | |
$s2 = substr($str,1,1); | |
$s = Hexdec(Bin2hex($s1.$s2)); | |
if( big5_isHB($s1) && big5_isLB($s2) ) | |
{ | |
for($i=0;$i<count($StrokeMapping);$i++) | |
if($StrokeMapping[$i][1] <= $s && $StrokeMapping[$i][2] >= $s) | |
return $StrokeMapping[$i][0]; | |
} | |
else | |
return false; | |
} | |
function get_string_stroke($str){ | |
$str = utf8_2_big5($str); | |
$stroke = big5_stroke($str); | |
return $stroke; | |
} | |
function ucompare($a, $b){ | |
if ($a['ord'] == $b['ord']) { | |
return 0; | |
} | |
return ($a['ord'] < $b['ord']) ? -1 : 1; | |
} | |
function cht_strokesort($str_arr, $dontSort = false){ | |
$ord_arr = array(); | |
//若是英數字,則依照ord來做排序,而筆劃排序則由base開始起算 | |
$stroke_base = 50000; | |
while (list($key, $value) = each($str_arr)) { | |
$value = urldecode($value); | |
$firstChar = mb_substr($value, 0, 1, 'UTF-8'); | |
$stroke = get_string_stroke($firstChar); | |
if($stroke>0){ | |
$ord = $stroke_base + $stroke; | |
}else{ | |
$ord = ord($firstChar); | |
} | |
$ord_arr[] = array( | |
'firstChar' => $firstChar, | |
'stroke' => $stroke?$stroke:-1, | |
'ord' => $ord, | |
'original_index' => $key, | |
'string' => $value | |
); | |
} | |
//若指定不排序 | |
if(!$dontSort){ | |
usort($ord_arr, "ucompare"); | |
} | |
// print_r($ord_arr); | |
return $ord_arr; | |
} | |
/* | |
$string = array('驢子','史瑞克','白雪公主','長靴貓','小木偶','薑餅人','三隻小豬','睡美人','壞皇后','七個小矮人','小美人魚','神仙教母', '龜', '台', '灣', '1', '2', '3', 'a', 'b', 'c', '一', '二', '三'); | |
$sorted = cht_strokesort($string); | |
foreach($sorted as $k => $v){ | |
$index = $v['original_index']; | |
echo $string[$index].","; | |
} | |
*/ | |
//http API: http://localhost/CHTSort/cht_strokesort.php?compare=%E9%A9%A2%E5%AD%90,%E5%8F%B2%E7%91%9E%E5%85%8B,%E7%99%BD%E9%9B%AA%E5%85%AC%E4%B8%BB,%E9%95%B7%E9%9D%B4%E8%B2%93,%E5%B0%8F%E6%9C%A8%E5%81%B6,%E8%96%91%E9%A4%85%E4%BA%BA,%E4%B8%80,%E4%BA%8C,%E4%B8%89,a,b,c,1,2,5,%E4%B8%83&callback=test&dont_sort=true | |
// | |
if(isset($_GET['compare'])){ | |
$arr = split(',', $_GET['compare']); | |
$dontSort = $_GET['dont_sort']?true:false; | |
$sorted = cht_strokesort($arr, $dontSort); | |
//為了減少http的傳輸, 原本傳入的字串不做回傳 | |
foreach($sorted as $k => $v){ | |
unset($sorted[$k]['string']); | |
} | |
$callback = $_GET['callback']; | |
if($callback){ | |
echo $callback."("; | |
} | |
echo json_encode($sorted); | |
if($callback){ | |
echo ")"; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment