Skip to content

Instantly share code, notes, and snippets.

@MakStashkevich
Created August 7, 2020 16:39
Show Gist options
  • Save MakStashkevich/e0c4c7860771bbbcbe1f353ca0c70199 to your computer and use it in GitHub Desktop.
Save MakStashkevich/e0c4c7860771bbbcbe1f353ca0c70199 to your computer and use it in GitHub Desktop.
Bad code parse letters)))
<?php
final class StrParser
{
public const LETTER_A = 'A|Ⓐ|ⓐ|a|á|À|Â|à|Â|â|Ä|ä|Ã|ã|Å|å|α|Δ|Λ|λ|Ꭿ|∀|₳|Ǻ|ǻ|α|ά|Ǡ|Ắ|ắ|Ằ|ằ|ẳ|Ẵ|ẵ|Ä|ª|ä|Å|À|Á|Â|å|ã|â|à|á|Ã|ᗩ|@|Ⱥ|Ǟ';
public const LETTER_B = 'B|Ⓑ|ⓑ|b|ß|Β|β|ℬ|Ᏸ|β|฿|ß|Ђ|ᗷ|ᗽ|ᗾ|ᗿ|Ɓ|ƀ|ხ|␢|Ᏸ|ᗸ|ᗹ|ᛔ';
public const LETTER_C = 'C|Ⓒ|ⓒ|c|Ç|ç|¢|©|☾|ℭ|ℂ|Ç|¢|ç|Č|ċ|Ċ|ĉ|ς|Ĉ|ć|Ć|č|Ḉ|ḉ|⊂|Ꮸ|₡|¢|Ⴚ';
public const LETTER_D = 'd|Ⓓ|ⓓ|Þ|þ|Ð|ð|∂|ᗫ|Ɗ|Ď|ď|Đ|đ|ð|∂|₫|ȡ|ᚦ|ᚧ';
public const LETTER_E = 'E|Ⓔ|ⓔ|e|ℰ|ℯ|ໂ|६|£|Ē|℮|ē|Ė|ė|Ę|ě|Ě|ę|Έ|ê|ξ|Ê|È|€|É|∑|Ế|Ề|Ể|Ễ|é|è|عЄ|є|έ|ε|Ҿ|ҿ';
public const LETTER_F = 'F|Ⓕ|ⓕ|f|ƒ|ℱ|₣|ƒ|∮|Ḟ|ḟ|ჶ|ᶂ|φ|ᚨ|ᚩ|ᚪ|ᚫ';
public const LETTER_G = 'G|Ⓖ|ⓖ|g|Ꮹ|Ꮆ|ℊ|Ǥ|ǥ|Ĝ|ĝ|Ğ|ğ|Ġ|ġ|Ģ|ģ|פ|ᶃ|₲';
public const LETTER_H = 'H|Ⓗ|ⓗ|h|ℍ|ℋ|ℎ|ℌ|ℏ|ዙ|Ꮵ|Ĥ|Ħ|ħ|Ή|廾|Ћ|ђ|Ḩ|Һ|ḩ|♄|ਮ';
public const LETTER_I = 'I|Ⓘ|ⓘ|i|Ì|Í|Î|Ï|ì|í|î|ï|ℐ|ί|ι|Ï|Ί|Î|ì|Ì|í|Í|î|ϊ|ΐ|Ĩ|ĩ|Ī|ī|Ĭ|ĭ|İ|į|Į|Ꭵ';
public const LETTER_J = 'J|Ⓙ|ⓙ|j|ჟ|Ĵ|ĵ|ᶖ|ɉ|ℑ';
public const LETTER_K = 'K|Ⓚ|ⓚ|k|Κ|κ|₭|Ꮶ|Ќ|k|ќ|ķ|Ķ|Ҝ|ҝ|ᶄ|Ҡ|ҡ';
public const LETTER_L = 'L|Ⓛ|ⓛ|l|£|ℒ|ℓ|Ŀ|ŀ|Ĺ|ĺ|Ļ|ļ|λ|₤|Ł|ł|ľ|Ľ|Ḽ|ḽ|ȴ|Ꮭ';
public const LETTER_M = 'M|Ⓜ|ⓜ|m|ℳ|ʍ|ᶆ|Ḿ|ḿ|ᗰ|ᙢ|爪|₥|ጠ|ᛖ';
public const LETTER_N = 'N|Ⓝ|ⓝ|n|η|ñ|Ν|Π|ℕ|η|ñ|ח|Ñ|ή|ŋ|Ŋ|Ń|ń|Ņ|ņ|Ň|ň|ʼn|ȵ|ℵ|₦|ห|ກ|⋒|Ӈ|ӈ';
public const LETTER_O = 'O|Ⓞ|ⓞ|o|Ο|○|ο|Φ|¤|°|ø|ö|ó|ℴ|ტ|٥|Ό|ó|ό|σ|ǿ|Ǿ|Θ|ò|Ó|Ò|Ô|ô|Ö|ö|Õ|õ|ờ|ớ|ọ|Ọ|ợ|Ợ|ø|Ø|Ό|Ở|Ờ|Ớ|Ổ|ổ|Ợ|Ō|ō|Ő|ő';
public const LETTER_P = 'P|Ⓟ|ⓟ|p|ρ|Ρ|¶|þ|ℙ|℘|ρ|Ꭾ|Ꮅ|尸|Ҏ|ҏ|ᶈ|₱|☧|ᖘ|ק|₽|թ|Ƿ|Ҏ|ҏ';
public const LETTER_Q = 'Q|Ⓠ|ⓠ|q|ℚ|q|Q|ᶐ|Ǭ|ǭ|ჹ|૧';
public const LETTER_R = 'R|Ⓡ|ⓡ|r|®|ℝ|ℜ|ℛ|℟|ჩ|ᖇ|ř|Ř|ŗ|Ŗ|ŕ|Ŕ|ᶉ|Ꮢ|尺|ᚱ';
public const LETTER_S = 'S|Ⓢ|ⓢ|s|Ṧ|ṧ|ȿ|Ś|ś|š|Š|ş|Ş|ŝ|Ŝ|₰|∫|ֆ|Տ|క';
public const LETTER_T = 'T|Ⓣ|ⓣ|t|Τ|τ|₸|†|T|t|τ|Ţ|ţ|Ť|ť|ŧ|Ŧ|干|Ṫ|ṫ|ナ|Ꮏ|Ꮖ|テ|₮|⍡';
public const LETTER_U = 'U|Ⓤ|ⓤ|u|υ|µ|∪|ᙀ|Ũ|Ủ|Ừ|Ử|Ữ|Ự|ύ|ϋ|ú|Ú|ΰ|ù|Û|û|Ü|ử|ữ|ự|ü|ừ|Ũ|ũ|Ū|ū|Ŭ|ŭ|ų|Ų|ű|Ű|ů|Ů|น|Ա';
public const LETTER_V = 'V|Ⓥ|ⓥ|v|ν|✔|✓|∨|√|Ꮙ|Ṽ|ṽ|ᶌ|℣';
public const LETTER_W = 'W|Ⓦ|ⓦ|w|ω|ψ|Ψ|₩|ẃ|Ẃ|ẁ|Ẁ|ẅ|ώ|ω|ŵ|Ŵ|Ꮤ|Ꮃ|ฬ|ᗯ|ᙡ|Ẅ|ѡ|ಎ|ಭ|Ꮚ|Ꮗ|ผ|ฝ|พ|ฟ';
public const LETTER_X = 'X|Ⓧ|ⓧ|x|Χ|χ|×|χ|×|✗|✘|᙭|ჯ|Ẍ|ẍ|ᶍ|א';
public const LETTER_Y = 'Y|Ⓨ|ⓨ|y|¥|γ|ÿ|ý|Ÿ|Ý|ɣ|Ꭹ|Ꮍ|Ẏ|ẏ|ϒ|ɤ|¥|ע|௶|Ⴘ';
public const LETTER_Z = 'Z|Ⓩ|ⓩ|z|ℤ|乙|Ẑ|ẑ|ɀ|Ꮓ';
public const LETTERS = [
self::LETTER_A => 'a',
self::LETTER_B => 'b',
self::LETTER_C => 'c',
self::LETTER_D => 'd',
self::LETTER_E => 'e',
self::LETTER_F => 'f',
self::LETTER_G => 'g',
self::LETTER_H => 'h',
self::LETTER_I => 'i',
self::LETTER_J => 'j',
self::LETTER_K => 'k',
self::LETTER_L => 'l',
self::LETTER_M => 'm',
self::LETTER_N => 'n',
self::LETTER_O => 'o',
self::LETTER_P => 'p',
self::LETTER_Q => 'q',
self::LETTER_R => 'r',
self::LETTER_S => 's',
self::LETTER_T => 't',
self::LETTER_U => 'u',
self::LETTER_V => 'v',
self::LETTER_W => 'w',
self::LETTER_X => 'x',
self::LETTER_Y => 'y',
self::LETTER_Z => 'z',
];
public const LETTER_RU_B = 'Ҕ|ҕ|Ϭ|ϭ|চ|ঢ়|ƃ|ɓ';
public const LETTER_RU_G = '୮|┍|ℾ';
public const LETTER_RU_D = 'ℊ|∂';
public const LETTER_RU_ZH = 'ᛤ|♅|Җ|җ|Ӝ|ӝ|Ӂ|ӂ';
public const LETTER_RU_Z = 'Յ|ℨ|ჳ';
public const LETTER_RU_I = 'น|ự|Ӥ|ӥ|Ũ|ũ|Ū|ū|Ŭ|ŭ|Ù|ú|Ú|ù|Ҋ|ҋ';
public const LETTER_RU_L = 'ለ|ሉ|ሊ|ሌ|ል|ሎ|Ꮧ|Ꮑ';
public const LETTER_RU_P = 'Ո|ग|ກ|⋒|Ҧ|ҧ';
public const LETTER_RU_F = 'Փ|փ|Ⴔ|ቁ|ቂ|ቃ|ቄ|ቅ|ቆ|ቇ|ቈ|ᛄ';
public const LETTER_RU_C = 'Ա|ų';
public const LETTER_RU_CH = 'Կ|կ|੫|Ⴁ|Ӵ|ӵ|Ҹ|ҹ';
public const LETTER_RU_SH = 'ש|ᗯ|ᙡ|ω';
public const LETTER_RU_SCH = 'պ|ખ';
public const LETTER_RU_II = 'Ӹ|ӹ';
public const LETTER_RU_SHIFT = 'Ѣ|ѣ|৮';
public const LETTER_RU_E = '∋|∌|∍|ヨ|Ӭ|ӭ|℈';
public const LETTER_RU_U = 'ਠ';
public const LETTERS_RU = [
self::LETTER_RU_B => 'б',
self::LETTER_RU_G => 'г',
self::LETTER_RU_D => 'д',
self::LETTER_RU_ZH => 'ж',
self::LETTER_RU_Z => 'з',
self::LETTER_RU_I => 'и',
self::LETTER_RU_L => 'л',
self::LETTER_RU_P => 'п',
self::LETTER_RU_F => 'ф',
self::LETTER_RU_C => 'ц',
self::LETTER_RU_CH => 'ч',
self::LETTER_RU_SH => 'ш',
self::LETTER_RU_SCH => 'щ',
self::LETTER_RU_II => 'ы',
self::LETTER_RU_SHIFT => 'ь',
self::LETTER_RU_E => 'э',
self::LETTER_RU_U => 'ю',
];
public const NUMBER_0 = '0';
public const NUMBER_1 = '1|①|❶|①|Ⅰ';
public const NUMBER_2 = '2|②|❷|②|Ⅱ';
public const NUMBER_3 = '3|③|❸|③|Ⅲ';
public const NUMBER_4 = '4|④|❹|④|Ⅳ';
public const NUMBER_5 = '5|⑤|❺|⑤|Ⅴ';
public const NUMBER_6 = '6|⑥|❻|⑥|Ⅵ';
public const NUMBER_7 = '7|⑦|❼|⑦|Ⅶ';
public const NUMBER_8 = '8|⑧|❽|⑧|Ⅷ';
public const NUMBER_9 = '9|⑨|❾|⑨|Ⅸ';
public const NUMBERS = [
self::NUMBER_0 => 0,
self::NUMBER_1 => 1,
self::NUMBER_2 => 2,
self::NUMBER_3 => 3,
self::NUMBER_4 => 4,
self::NUMBER_5 => 5,
self::NUMBER_6 => 6,
self::NUMBER_7 => 7,
self::NUMBER_8 => 8,
self::NUMBER_9 => 9,
];
/**
* Parse string and return clean text
*
* @param string $str
* @param bool $clearLetters
* @param bool $clearNumbers
*
* @return string
*/
public static function parse(string $str, bool $clearLetters = true, bool $clearNumbers = true): string
{
if ($clearLetters) {
foreach (self::LETTERS as $pattern => $letter) {
$str = preg_replace('/' . $pattern . '/', $letter, $str);
}
foreach (self::LETTERS_RU as $pattern => $letter) {
$str = preg_replace('/' . $pattern . '/', $letter, $str);
}
}
if ($clearNumbers) {
foreach (self::NUMBERS as $pattern => $number) {
$str = preg_replace('/' . $pattern . '/', $number, $str);
}
}
return $str;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment