Skip to content

Instantly share code, notes, and snippets.

@skrashevich
Last active July 24, 2024 05:46
Show Gist options
  • Save skrashevich/8467b3f36f49a04fd9b1197c5ffa3c1f to your computer and use it in GitHub Desktop.
Save skrashevich/8467b3f36f49a04fd9b1197c5ffa3c1f to your computer and use it in GitHub Desktop.
<?php
$str = "к скалам бурым";
print "ru_soundex($str) = " . ru_soundex($str) . "<br />\r\n";
$str = "с каламбуром";
print "ru_soundex($str) = " . ru_soundex($str) . "<br />\r\n";
$str = "с калом бурым";
print "ru_soundex($str) = " . ru_soundex($str) . "<br />\r\n";
function str_to_translit($str)
{
return strtr(
$str,
"abcdefghijklmnopqrstuvwxyz" .
"абвгдеёжзийклмнопрстуфхцчшщъыьэюя",
"abcdefghijklmnopqrstuvwxyz" .
"abvgde" .
"?*ziik" .
"lmnopr" .
"stufhc" .
"4ww\"y`" .
"eua"
);
}
function ru_soundex($source)
{
$literal = [
'А' => ['status' => 'гласный', 'sound' => 'а', 'stressed' => 'а'],
'Е' => ['status' => 'гласный', 'sound' => 'и', 'stressed' => 'э', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йэ'],
'Ё' => ['status' => 'гласный', 'sound' => 'о', 'stressed' => 'о', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йо'],
'И' => ['status' => 'гласный', 'sound' => 'и', 'stressed' => 'и'],
'О' => ['status' => 'гласный', 'sound' => 'а', 'stressed' => 'о'],
'У' => ['status' => 'гласный', 'sound' => 'у', 'stressed' => 'у'],
'Ы' => ['status' => 'гласный', 'sound' => 'ы', 'stressed' => 'ы'],
'Э' => ['status' => 'гласный', 'sound' => 'э', 'stressed' => 'э'],
'Ю' => ['status' => 'гласный', 'sound' => 'у', 'stressed' => 'у', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йу'],
'Я' => ['status' => 'гласный', 'sound' => 'а', 'stressed' => 'а', 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ' => 'йа'],
'Б' => ['status' => 'согласный', 'sound' => 'б', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'п'],
'В' => ['status' => 'согласный', 'sound' => 'в', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'ф'],
'Г' => ['status' => 'согласный', 'sound' => 'Г', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'к'],
'Д' => ['status' => 'согласный', 'sound' => 'д', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'т'],
'Ж' => ['status' => 'согласный', 'sound' => 'ж', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'ш'],
'З' => ['status' => 'согласный', 'sound' => 'з', 'КкПпСсТтФфХхЦцЧчШшЩщ' => 'с'],
'Й' => ['status' => 'согласный', 'sound' => 'й'],
'К' => ['status' => 'согласный', 'sound' => 'к', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''],
'Л' => ['status' => 'согласный', 'sound' => 'л'],
'М' => ['status' => 'согласный', 'sound' => 'м'],
'Н' => ['status' => 'согласный', 'sound' => 'н'],
'П' => ['status' => 'согласный', 'sound' => 'п', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''],
'Р' => ['status' => 'согласный', 'sound' => 'р'],
'С' => ['status' => 'согласный', 'sound' => 'с'],
'Т' => ['status' => 'согласный', 'sound' => 'т', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''],
'Ф' => ['status' => 'согласный', 'sound' => 'ф', 'КкПпСсТтФфХхЦцЧчШшЩщ' => ''],
'Х' => ['status' => 'согласный', 'sound' => 'х'],
'Ц' => ['status' => 'согласный', 'sound' => 'ц'],
'Ч' => ['status' => 'согласный', 'sound' => 'чь'],
'Ш' => ['status' => 'согласный', 'sound' => 'ш'],
'Щ' => ['status' => 'согласный', 'sound' => 'щь'],
'Ъ' => ['status' => 'знак', 'sound' => ' '],
'Ь' => ['status' => 'знак', 'sound' => 'ь'],
'ТС' => ['status' => 'сочетание', 'sound' => 'ц'],
'ТЬС' => ['status' => 'сочетание', 'sound' => 'ц'],
'ШЬ' => ['status' => 'сочетание', 'sound' => 'ш'],
'СОЛНЦ' => ['status' => 'сочетание', 'sound' => 'сонц'],
'ЯИЧНИЦ' => ['status' => 'сочетание', 'sound' => 'еишниц'],
'КОНЕЧНО' => ['status' => 'сочетание', 'sound' => 'канешно'],
'ЧТО' => ['status' => 'сочетание', 'sound' => 'што'],
'ЗАЯ' => ['status' => 'сочетание', 'sound' => 'зайэ']
];
$v_pattern = 'АаЕеЁёИиОоУуЭэЮюЯяЬьЫыЪъ';
$sound = mb_strtoupper($source);
foreach (array_filter($literal, fn($item) => $item["status"] === "сочетание") as $sign => $translate) {
$sound = str_replace($sign, $translate["sound"], $sound);
}
foreach (array_filter($literal, fn($item) => $item["status"] === "знак") as $sign => $translate) {
$sound = str_replace($sign, $translate["sound"], $sound);
}
$words = preg_split('~[,.\~`1234567890-=\~!@#$%^&*()_+|{}\]\];:\'"<>/? ]~', $sound, -1, PREG_SPLIT_NO_EMPTY);
foreach (array_filter($literal, fn($item) => $item["status"] === "гласный") as $sign => $translate) {
foreach ($words as &$word) {
$vowel = preg_match_all("~[$v_pattern]~", $word, $del_me);
$cur_pos = 0;
$cur_vowel = 0;
while (false !== $cur_pos = strpos($word, $sign, $cur_pos)) {
$cur_vowel++;
if (sizeof($translate) == 4 && ($cur_pos === 0 || strpos($v_pattern, $word[$cur_pos - 1]))) {
$word = substr_replace($word, $translate[$v_pattern], $cur_pos, 1);
} elseif ($vowel == 1) {
$word = substr_replace($word, $translate["stressed"], $cur_pos, 1);
} elseif ($vowel == 2 && $cur_vowel == 1) {
$word = substr_replace($word, $translate["stressed"], $cur_pos, 1);
} elseif ($vowel >= 3 && $cur_vowel == $vowel - 2) {
$word = substr_replace($word, $translate["stressed"], $cur_pos, 1);
} else {
$word = substr_replace($word, $translate["sound"], $cur_pos, 1);
}
$cur_pos++;
}
}
}
$sound = implode(' ', $words);
foreach (array_filter($literal, fn($item) => $item["status"] === "согласный") as $sign => $translate) {
$cur_pos = 0;
while (false !== $cur_pos = strpos($sound, $sign, $cur_pos)) {
if (sizeof($translate) == 3) {
$keys = array_keys($translate);
$x = array_pop($keys);
if (strpos($x, $sound[$cur_pos + 1]) || $cur_pos === strlen($sound)) {
$sound = substr_replace($sound, $translate[$x], $cur_pos, 1);
} elseif ($sound[$cur_pos] === $sound[$cur_pos + 1]) {
$sound = substr_replace($sound, $translate["sound"], $cur_pos, 2);
} else {
$sound = substr_replace($sound, $translate["sound"], $cur_pos, 1);
}
} else {
$sound = substr_replace($sound, $translate["sound"], $cur_pos, 1);
}
$cur_pos++;
}
}
$sound = preg_replace('~[,.\~`1234567890-=\~!@#$%^&*()_+|{}\]\];:\'"<>/? ]~', '', $sound);
$res = mb_strtoupper($source[0]) . mb_substr(soundex(str_to_translit($sound)), 1);
return $res;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment