Last active
July 26, 2022 10:29
-
-
Save kamiaka/1df9a636a449b41b62e78c22f5efda66 to your computer and use it in GitHub Desktop.
Replace Japanese Kana from UTF-8-MAC to UTF-8 for any env.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Replace Japanese Kana from UTF-8-MAC to UTF-8 for any env. | |
* | |
* code is generated | |
* | |
* @param string utf-8-mac string | |
* @return string utf-8 string | |
*/ | |
function normalizeUTF8MacKana($str) { | |
return str_replace( | |
array("\xe3\x81\x8b\xe3\x82\x99", "\xe3\x81\x8d\xe3\x82\x99", "\xe3\x81\x8f\xe3\x82\x99", "\xe3\x81\x91\xe3\x82\x99", "\xe3\x81\x93\xe3\x82\x99", "\xe3\x81\x95\xe3\x82\x99", "\xe3\x81\x97\xe3\x82\x99", "\xe3\x81\x99\xe3\x82\x99", "\xe3\x81\x9b\xe3\x82\x99", "\xe3\x81\x9d\xe3\x82\x99", "\xe3\x81\x9f\xe3\x82\x99", "\xe3\x81\xa1\xe3\x82\x99", "\xe3\x81\xa4\xe3\x82\x99", "\xe3\x81\xa6\xe3\x82\x99", "\xe3\x81\xa8\xe3\x82\x99", "\xe3\x81\xaf\xe3\x82\x99", "\xe3\x81\xb2\xe3\x82\x99", "\xe3\x81\xb5\xe3\x82\x99", "\xe3\x81\xb8\xe3\x82\x99", "\xe3\x81\xbb\xe3\x82\x99", "\xe3\x81\x86\xe3\x82\x99", "\xe3\x82\x9d\xe3\x82\x99", "\xe3\x82\xab\xe3\x82\x99", "\xe3\x82\xad\xe3\x82\x99", "\xe3\x82\xaf\xe3\x82\x99", "\xe3\x82\xb1\xe3\x82\x99", "\xe3\x82\xb3\xe3\x82\x99", "\xe3\x82\xb5\xe3\x82\x99", "\xe3\x82\xb7\xe3\x82\x99", "\xe3\x82\xb9\xe3\x82\x99", "\xe3\x82\xbb\xe3\x82\x99", "\xe3\x82\xbd\xe3\x82\x99", "\xe3\x82\xbf\xe3\x82\x99", "\xe3\x83\x81\xe3\x82\x99", "\xe3\x83\x84\xe3\x82\x99", "\xe3\x83\x86\xe3\x82\x99", "\xe3\x83\x88\xe3\x82\x99", "\xe3\x83\x8f\xe3\x82\x99", "\xe3\x83\x92\xe3\x82\x99", "\xe3\x83\x95\xe3\x82\x99", "\xe3\x83\x98\xe3\x82\x99", "\xe3\x83\x9b\xe3\x82\x99", "\xe3\x82\xa6\xe3\x82\x99", "\xe3\x83\xaf\xe3\x82\x99", "\xe3\x83\xb0\xe3\x82\x99", "\xe3\x83\xb1\xe3\x82\x99", "\xe3\x83\xb2\xe3\x82\x99", "\xe3\x81\xaf\xe3\x82\x9a", "\xe3\x81\xb2\xe3\x82\x9a", "\xe3\x81\xb5\xe3\x82\x9a", "\xe3\x81\xb8\xe3\x82\x9a", "\xe3\x81\xbb\xe3\x82\x9a", "\xe3\x83\x8f\xe3\x82\x9a", "\xe3\x83\x92\xe3\x82\x9a", "\xe3\x83\x95\xe3\x82\x9a", "\xe3\x83\x98\xe3\x82\x9a", "\xe3\x83\x9b\xe3\x82\x9a", "\xe3\x82\x99", "\xe3\x82\x9a"), | |
array("\xe3\x81\x8c", "\xe3\x81\x8e", "\xe3\x81\x90", "\xe3\x81\x92", "\xe3\x81\x94", "\xe3\x81\x96", "\xe3\x81\x98", "\xe3\x81\x9a", "\xe3\x81\x9c", "\xe3\x81\x9e", "\xe3\x81\xa0", "\xe3\x81\xa2", "\xe3\x81\xa5", "\xe3\x81\xa7", "\xe3\x81\xa9", "\xe3\x81\xb0", "\xe3\x81\xb3", "\xe3\x81\xb6", "\xe3\x81\xb9", "\xe3\x81\xbc", "\xe3\x82\x94", "\xe3\x82\x9e", "\xe3\x82\xac", "\xe3\x82\xae", "\xe3\x82\xb0", "\xe3\x82\xb2", "\xe3\x82\xb4", "\xe3\x82\xb6", "\xe3\x82\xb8", "\xe3\x82\xba", "\xe3\x82\xbc", "\xe3\x82\xbe", "\xe3\x83\x80", "\xe3\x83\x82", "\xe3\x83\x85", "\xe3\x83\x87", "\xe3\x83\x89", "\xe3\x83\x90", "\xe3\x83\x93", "\xe3\x83\x96", "\xe3\x83\x99", "\xe3\x83\x9c", "\xe3\x83\xb4", "\xe3\x83\xb7", "\xe3\x83\xb8", "\xe3\x83\xb9", "\xe3\x83\xba", "\xe3\x81\xb1", "\xe3\x81\xb4", "\xe3\x81\xb7", "\xe3\x81\xba", "\xe3\x81\xbd", "\xe3\x83\x91", "\xe3\x83\x94", "\xe3\x83\x97", "\xe3\x83\x9a", "\xe3\x83\x9d", "\xe3\x82\x9b", "\xe3\x82\x9c"), | |
$str | |
); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Generate the normalizeUTF8MacKana function. | |
* | |
* @return string | |
*/ | |
function normalizeUTF8MacKanaFuncGen() { | |
$fullLs = [ | |
"かきくけこさしすせそたちつてとはひふへほうゝカキクケコサシスセソタチツテトハヒフヘホウワヰヱヲ", | |
"がぎぐげござじずぜぞだぢづでどばびぶべぼゔゞガギグゲゴザジズゼゾダヂヅデドバビブベボヴヷヸヹヺ" | |
]; | |
$halfLs = ["はひふへほハヒフヘホ", "ぱぴぷぺぽパピプペポ"]; | |
$search = []; | |
$replace = []; | |
foreach ([$fullLs, $halfLs] as $isHalf => $ls) { | |
foreach ([true, false] as $key => $isSearch) { | |
$name = $isSearch ? 'search' : 'replace'; | |
$char = mb_convert_encoding($ls[$key], 'UTF-8', 'auto'); | |
for ($i = 0; $i < mb_strlen($char); $i++) { | |
$str = mb_substr($char, $i, 1); | |
if ($isSearch) { | |
$str .= $isHalf ? "\xe3\x82\x9a" : "\xe3\x82\x99"; | |
} | |
$hex = ''; | |
for ($j = 0; $j < strlen($str); $j++) { | |
$hex .= '\x' . dechex(ord($str[$j])); | |
} | |
$$name[] = $hex; | |
} | |
} | |
} | |
return implode("\n", [ | |
'function normalizeUTF8MacKana($str) {', | |
' return str_replace(', | |
' array("' . implode('", "', $search ) . '", "\xe3\x82\x99", "\xe3\x82\x9a"),', | |
' array("' . implode('", "', $replace) . '", "\xe3\x82\x9b", "\xe3\x82\x9c"),', | |
' $str', | |
' );', | |
'}' | |
]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment