Created
March 12, 2017 09:08
-
-
Save sergiks/82b7f822e5ff543aa3d7e687f94756a6 to your computer and use it in GitHub Desktop.
PHP class for convertion between Russian text and numbers. First draft.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Перевод численных в числа и чисел в текст на Русском языке. | |
* | |
* Сергей Соколов [email protected] Москва 2017. | |
*/ | |
class NumText { | |
private $numbers = [ | |
1 => ['один','одна'], | |
2 => ['два','две'], | |
3 => ['три'], | |
4 => ['четыре'], | |
5 => ['пять'], | |
6 => ['шесть'], | |
7 => ['семь'], | |
8 => ['восемь'], | |
9 => ['девять'], | |
10 => ['десять'], // дублируется с десятками | |
11 => ['одиннадцать'], | |
12 => ['двенадцать'], | |
13 => ['тринадцать'], | |
14 => ['четырнадцать'], | |
15 => ['пятнадцать'], | |
16 => ['шестнадцать'], | |
17 => ['семнадцать'], | |
18 => ['восемнадцать'], | |
19 => ['девятнадцать'], | |
]; | |
private $tens = [ | |
10 => ['десять'], | |
20 => ['двадцать'], | |
30 => ['тридцать'], | |
40 => ['сорок'], | |
50 => ['пятьдесят'], | |
60 => ['шестьдесят'], | |
70 => ['семдесят'], | |
80 => ['восемдесят'], | |
90 => ['девяносто'], | |
]; | |
private $hundreds = [ | |
100 => ['сто'], | |
200 => ['двести'], | |
300 => ['триста'], | |
400 => ['четыреста'], | |
500 => ['пятьсот'], | |
600 => ['шестьсот'], | |
700 => ['семьсот'], | |
800 => ['восемьсот'], | |
900 => ['девятьсот'], | |
]; | |
private $powers = [ | |
3 => ['тысяча','тысячи','тысяч','тыс'], | |
6 => ['миллион','миллиона','миллионов','млн'], | |
9 => ['миллиард','миллиарда','миллиардов','млрд'], | |
]; | |
public static function toNumber( $text) { | |
static $NP; | |
if( $NP === null) $NP = new self(); | |
$words = preg_split('/\s/', $text); | |
$groups = $NP->splitTextToGroups( $words); | |
$n = 0; | |
foreach( $groups AS $exp => $group) { | |
$n += pow(10, $exp) * $NP->textGroupToNumber( $group); | |
} | |
return $n; | |
} | |
public static function toText( $number) { | |
} | |
/** | |
* 12345 => [12, 345] | |
*/ | |
private function splitNumberToGroups( $number) { | |
} | |
/** | |
* Разбивает на трёхзначные группы по словам "тысяч*", "миллион*" и т.п. | |
* Возвращает ассоциативный массив вида "экспонента" => слова | |
* "пятьдесят шесть тысяч триста двадцать семь" => [ 3 => "пятьдесят шесть", 0 => "триста двадцать семь"] | |
* | |
* @param array $words | |
* | |
*/ | |
private function splitTextToGroups( $words) { | |
$group = []; | |
$result = []; | |
foreach( $words AS $i=>$word) { | |
$found = false; | |
foreach( $this->powers AS $j=>$power) { | |
if( in_array( $word, $power)) { | |
$found = $j; | |
break; | |
} | |
} | |
if( $found === false) { | |
array_push( $group, $word); | |
continue; | |
} | |
$result[ $found] = $group; | |
$group = []; | |
} | |
if( count( $group)) $result[0] = $group; | |
return $result; | |
} | |
/** | |
* Из группы до трёх слов получает число 0..999 | |
* @param array group of words | |
* @return int number | |
*/ | |
private function textGroupToNumber( $group) { | |
$n = 0; | |
foreach( $group AS $i => $word) { | |
if( false !== ($value = $this->keyByValue( $this->hundreds, $word))) { | |
$n += $value; | |
} else if( false !== ($value = $this->keyByValue( $this->tens, $word))) { | |
$n += $value; | |
} else if( false !== ($value = $this->keyByValue( $this->numbers, $word))) { | |
$n += $value; | |
} else { | |
// not found | |
echo "Not found: " . $word . PHP_EOL; | |
} | |
} | |
return $n; | |
} | |
/** | |
* 369 => "триста шестьдесят девять" | |
*/ | |
private function groupToText( $number) { | |
} | |
private function keyByValue( $array, $value) { | |
foreach( $array AS $k => $v) { | |
if( is_array( $v)) { | |
if( in_array( $value, $v)) return $k; | |
} else { | |
if( $value === $v) return $k; | |
} | |
} | |
return false; | |
} | |
} | |
echo NumText::toNumber("пятьдесят тысяч четыреста двадцать два") . PHP_EOL; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment