Last active
July 28, 2023 01:55
-
-
Save ten9miq/ba6eb7be4d4edf5f08b70d073f680717 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// UTF-8の範囲内で、特定の開始コードポイントから終了コードポイントまでの文字を生成する関数 | |
function generateUTF8CharactersInRange($startCodePoint, $endCodePoint) { | |
$utf8Characters = ''; | |
for ($codePoint = $startCodePoint; $codePoint <= $endCodePoint; $codePoint++) { | |
$utf8Characters .= mb_convert_encoding('&#' . $codePoint . ';', 'UTF-8', 'HTML-ENTITIES'); | |
} | |
return $utf8Characters; | |
} | |
// UTF-8のすべての文字を生成 | |
$utf8String = generateUTF8CharactersInRange(0x0000, 0x10FFFF); | |
// UTF-8の文字列をSJISに変換し、変換できない文字を抽出 | |
function findUnconvertibleChars($utf8String) { | |
$unconvertibleChars = []; | |
$length = mb_strlen($utf8String); | |
for ($i = 0; $i < $length; $i++) { | |
$char = mb_substr($utf8String, $i, 1); | |
$sjisChar = mb_convert_encoding($char, 'SJIS', 'UTF-8'); | |
if ($sjisChar === '?') { | |
$unconvertibleChars[] = $char; | |
} | |
} | |
return $unconvertibleChars; | |
} | |
// 変換できない文字の一覧を取得 | |
$unconvertibleChars = findUnconvertibleChars($utf8String); | |
// 一覧を出力 | |
echo "変換できない文字の一覧:\n"; | |
foreach ($unconvertibleChars as $char) { | |
echo $char . "\n"; | |
} | |
############################################################ | |
// UTF-8の範囲内で、特定の開始コードポイントから終了コードポイントまでの文字を生成する関数 | |
function generateUTF8CharactersInRange($startCodePoint, $endCodePoint) { | |
$utf8Characters = ''; | |
for ($codePoint = $startCodePoint; $codePoint <= $endCodePoint; $codePoint++) { | |
$utf8Characters .= mb_convert_encoding('&#' . $codePoint . ';', 'UTF-8', 'HTML-ENTITIES'); | |
} | |
return $utf8Characters; | |
} | |
// 処理を分割して実行する | |
$startCodePoint = 0x0000; | |
$endCodePoint = 0xFFFF; | |
while ($startCodePoint <= 0x10FFFF) { | |
$endCodePoint = min($startCodePoint + 0xFFFF, 0x10FFFF); | |
$utf8String = generateUTF8CharactersInRange($startCodePoint, $endCodePoint); | |
// UTF-8の文字列をSJISに変換し、変換できない文字を抽出 | |
$unconvertibleChars = findUnconvertibleChars($utf8String); | |
// 変換できない文字の一覧を出力 | |
echo "変換できない文字の一覧:\n"; | |
foreach ($unconvertibleChars as $char) { | |
echo $char . "\n"; | |
} | |
// 次の範囲に移動 | |
$startCodePoint = $endCodePoint + 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment