Last active
December 31, 2015 12:09
-
-
Save latel/7984503 to your computer and use it in GitHub Desktop.
权重计算,用于返回一行文字在字典里哪一行的权重最高
变通一下亦可以用来做关键词匹配,分词等。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* vim: set expandtab tabstop=4 shiftwidth=4: */ | |
// +------------------------------------------------------------------------ | |
// Name : Weight Calculat | |
// Description: provide weight calculation | |
// Date : 2013/12/16 08:51 | |
// Authors : latel <[email protected]> | |
// +------------------------------------------------------------------------ | |
// | |
/*外部调用*/ | |
$aItems = array( | |
'chinaisbig', | |
'whichisnot', | |
'totalyrightforme', | |
); | |
$aTable = array( | |
'china,is|small', | |
'china,big|me', | |
'china,is|big,which|not,me', | |
'totaly|right,for,me', | |
); | |
$oWeight = new ttrie; | |
$oWeight->newItems($aItems); | |
$aResult = $oWeight->newTable($aTable); | |
/*类结构*/ | |
class TTrie { | |
protected $aDict = array(array()); | |
protected $aItems = array(); | |
protected $sLastRule; | |
protected $aMatchs = array(); | |
protected $aShow = array(); | |
private function init() { | |
//清空记录的匹配表和输出结果 | |
unset($this->aShow); | |
} | |
public function newItems($mItems) { | |
//导入新的项目 | |
$this->aItems = (is_array($mItems))? $mItems: array($mItems); | |
$this->init(); | |
} | |
public function newTable(array $aTable) { | |
//导入新的对照表,并生成字典 | |
foreach($aTable as $iTableKey=>$sTableLine) { | |
$aTableLine = explode(',', str_replace('|', ',', $sTableLine)); | |
$setter = function($v, $k, $paraMeter) { | |
$k1 = $paraMeter[0]; $oWeight = $paraMeter[1]; | |
$oWeight->genDict($v, $k1); | |
}; | |
array_walk($aTableLine, $setter, array($iTableKey, $this)); | |
} | |
$this->init(); | |
} | |
public function getShow($sRule = 'max') { | |
//获取最终的显示结果 | |
if(empty($this->aItems) || empty($this->aDict)) | |
return array(); | |
if (empty($this->aShow) || $sRule != $this->sLastRule) | |
return $this->genShow($sRule); | |
return $this->aShow; | |
} | |
public function genShow($sRule) { | |
$aShow = array(); | |
$aMatchs = array(); | |
$getter = function($v, $k, $oWeight) use(&$aShow, &$aMatchs, $sRule) { | |
$t = array_count_values($oWeight->matchWord($v)); | |
$aMatchs[] = $t; | |
switch ($sRule) { | |
case 'max': | |
$aShow[$k] = array_keys($t, max($t)); | |
break; | |
} | |
}; | |
array_walk($this->aItems, $getter, $this); | |
$this->aShow = $aShow; | |
$this->aMatchs = $aMatchs; | |
return $aShow; | |
} | |
private function genDict($mWord, $iKey = '') { | |
$iInsertPonit = count($this->aDict); | |
$iCur = 0; | |
foreach (str_split($mWord) as $iChar) { | |
if (isset($this->aDict[$iCur][$iChar])) { | |
$iCur = $this->aDict[$iCur][$iChar]; | |
continue; | |
} | |
$this->aDict[$iInsertPonit] = array(); | |
$this->aDict[$iCur][$iChar] = $iInsertPonit; | |
$iCur = $iInsertPonit; | |
$iInsertPonit++; | |
} | |
$this->aDict[$iCur]['acc'][] = $iKey; | |
} | |
function matchWord($sLine) { | |
$iCur = $iOffset = $iPosition = 0; | |
$sLine .= "\0"; | |
$iLen = strlen($sLine); | |
$aReturn = array(); | |
while($iOffset < $iLen) { | |
$sChar = $sLine{$iOffset}; | |
if(isset($this->aDict[$iCur][$sChar])) { | |
$iCur = $this->aDict[$iCur][$sChar]; | |
if(isset($this->aDict[$iCur]['acc'])) { | |
$aReturn = array_merge($aReturn, $this->aDict[$iCur]['acc']); | |
$iPosition = $iOffset + 1; | |
$iCur = 0; | |
} | |
} else { | |
$iCur = 0; | |
$iOffset = $iPosition; | |
$iPosition = $iOffset + 1; | |
} | |
++$iOffset; | |
} | |
return $aReturn; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment