Last active
July 24, 2024 05:46
-
-
Save skrashevich/8467b3f36f49a04fd9b1197c5ffa3c1f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$str = "к скалам бурым"; | |
print "ru_soundex($str) = " . ru_soundex($str) . "<br />\r\n"; | |
$str = "с каламбуром"; | |
print "ru_soundex($str) = " . ru_soundex($str) . "<br />\r\n"; | |
$str = "с калом бурым"; | |
print "ru_soundex($str) = " . ru_soundex($str) . "<br />\r\n"; | |
function str_to_translit($str) | |
{ | |
return strtr( | |
$str, | |
"abcdefghijklmnopqrstuvwxyz" . | |
"абвгдеёжзийклмнопрстуфхцчшщъыьэюя", | |
"abcdefghijklmnopqrstuvwxyz" . | |
"abvgde" . | |
"?*ziik" . | |
"lmnopr" . | |
"stufhc" . | |
"4ww\"y`" . | |
"eua" | |
); | |
} | |
function ru_soundex($source) | |
{ | |
$literal = [ | |
'А' => ['status' => 'гласный', 'sound' => 'а', 'stressed' => 'а'], | |
'Е' => ['status' => 'гласный', 'sound' => 'и', 'stressed' => 'э', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йэ'], | |
'Ё' => ['status' => 'гласный', 'sound' => 'о', 'stressed' => 'о', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йо'], | |
'И' => ['status' => 'гласный', 'sound' => 'и', 'stressed' => 'и'], | |
'О' => ['status' => 'гласный', 'sound' => 'а', 'stressed' => 'о'], | |
'У' => ['status' => 'гласный', 'sound' => 'у', 'stressed' => 'у'], | |
'Ы' => ['status' => 'гласный', 'sound' => 'ы', 'stressed' => 'ы'], | |
'Э' => ['status' => 'гласный', 'sound' => 'э', 'stressed' => 'э'], | |
'Ю' => ['status' => 'гласный', 'sound' => 'у', 'stressed' => 'у', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йу'], | |
'Я' => ['status' => 'гласный', 'sound' => 'а', 'stressed' => 'а', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йа'], | |
'Б' => ['status' => 'согласный', 'sound' => 'б', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'п'], | |
'В' => ['status' => 'согласный', 'sound' => 'в', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'ф'], | |
'Г' => ['status' => 'согласный', 'sound' => 'Г', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'к'], | |
'Д' => ['status' => 'согласный', 'sound' => 'д', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'т'], | |
'Ж' => ['status' => 'согласный', 'sound' => 'ж', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'ш'], | |
'З' => ['status' => 'согласный', 'sound' => 'з', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'с'], | |
'Й' => ['status' => 'согласный', 'sound' => 'й'], | |
'К' => ['status' => 'согласный', 'sound' => 'к', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''], | |
'Л' => ['status' => 'согласный', 'sound' => 'л'], | |
'М' => ['status' => 'согласный', 'sound' => 'м'], | |
'Н' => ['status' => 'согласный', 'sound' => 'н'], | |
'П' => ['status' => 'согласный', 'sound' => 'п', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''], | |
'Р' => ['status' => 'согласный', 'sound' => 'р'], | |
'С' => ['status' => 'согласный', 'sound' => 'с'], | |
'Т' => ['status' => 'согласный', 'sound' => 'т', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''], | |
'Ф' => ['status' => 'согласный', 'sound' => 'ф', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''], | |
'Х' => ['status' => 'согласный', 'sound' => 'х'], | |
'Ц' => ['status' => 'согласный', 'sound' => 'ц'], | |
'Ч' => ['status' => 'согласный', 'sound' => 'чь'], | |
'Ш' => ['status' => 'согласный', 'sound' => 'ш'], | |
'Щ' => ['status' => 'согласный', 'sound' => 'щь'], | |
'Ъ' => ['status' => 'знак', 'sound' => ' '], | |
'Ь' => ['status' => 'знак', 'sound' => 'ь'], | |
'ТС' => ['status' => 'сочетание', 'sound' => 'ц'], | |
'ТЬС' => ['status' => 'сочетание', 'sound' => 'ц'], | |
'ШЬ' => ['status' => 'сочетание', 'sound' => 'ш'], | |
'СОЛНЦ' => ['status' => 'сочетание', 'sound' => 'сонц'], | |
'ЯИЧНИЦ' => ['status' => 'сочетание', 'sound' => 'еишниц'], | |
'КОНЕЧНО' => ['status' => 'сочетание', 'sound' => 'канешно'], | |
'ЧТО' => ['status' => 'сочетание', 'sound' => 'што'], | |
'ЗАЯ' => ['status' => 'сочетание', 'sound' => 'зайэ'] | |
]; | |
$v_pattern = 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ'; | |
$sound = mb_strtoupper($source); | |
foreach (array_filter($literal, fn($item) => $item["status"] === "сочетание") as $sign => $translate) { | |
$sound = str_replace($sign, $translate["sound"], $sound); | |
} | |
foreach (array_filter($literal, fn($item) => $item["status"] === "знак") as $sign => $translate) { | |
$sound = str_replace($sign, $translate["sound"], $sound); | |
} | |
$words = preg_split('~[,.\~`1234567890-=\~!@#$%^&*()_+|{}\]\];:\'"<>/? ]~', $sound, -1, PREG_SPLIT_NO_EMPTY); | |
foreach (array_filter($literal, fn($item) => $item["status"] === "гласный") as $sign => $translate) { | |
foreach ($words as &$word) { | |
$vowel = preg_match_all("~[$v_pattern]~", $word, $del_me); | |
$cur_pos = 0; | |
$cur_vowel = 0; | |
while (false !== $cur_pos = strpos($word, $sign, $cur_pos)) { | |
$cur_vowel++; | |
if (sizeof($translate) == 4 && ($cur_pos === 0 || strpos($v_pattern, $word[$cur_pos - 1]))) { | |
$word = substr_replace($word, $translate[$v_pattern], $cur_pos, 1); | |
} elseif ($vowel == 1) { | |
$word = substr_replace($word, $translate["stressed"], $cur_pos, 1); | |
} elseif ($vowel == 2 && $cur_vowel == 1) { | |
$word = substr_replace($word, $translate["stressed"], $cur_pos, 1); | |
} elseif ($vowel >= 3 && $cur_vowel == $vowel - 2) { | |
$word = substr_replace($word, $translate["stressed"], $cur_pos, 1); | |
} else { | |
$word = substr_replace($word, $translate["sound"], $cur_pos, 1); | |
} | |
$cur_pos++; | |
} | |
} | |
} | |
$sound = implode(' ', $words); | |
foreach (array_filter($literal, fn($item) => $item["status"] === "согласный") as $sign => $translate) { | |
$cur_pos = 0; | |
while (false !== $cur_pos = strpos($sound, $sign, $cur_pos)) { | |
if (sizeof($translate) == 3) { | |
$keys = array_keys($translate); | |
$x = array_pop($keys); | |
if (strpos($x, $sound[$cur_pos + 1]) || $cur_pos === strlen($sound)) { | |
$sound = substr_replace($sound, $translate[$x], $cur_pos, 1); | |
} elseif ($sound[$cur_pos] === $sound[$cur_pos + 1]) { | |
$sound = substr_replace($sound, $translate["sound"], $cur_pos, 2); | |
} else { | |
$sound = substr_replace($sound, $translate["sound"], $cur_pos, 1); | |
} | |
} else { | |
$sound = substr_replace($sound, $translate["sound"], $cur_pos, 1); | |
} | |
$cur_pos++; | |
} | |
} | |
$sound = preg_replace('~[,.\~`1234567890-=\~!@#$%^&*()_+|{}\]\];:\'"<>/? ]~', '', $sound); | |
$res = mb_strtoupper($source[0]) . mb_substr(soundex(str_to_translit($sound)), 1); | |
return $res; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment