Created
November 15, 2014 01:33
-
-
Save masayuki5160/b7bb4c9a98d8d74d1166 to your computer and use it in GitHub Desktop.
php_mecabのテスト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE HTML> | |
<html> | |
<head> | |
<meta charset="UTF-8"> | |
<title>転置インデックス作成</title> | |
</head> | |
<body> | |
<form name="input" action="indexer.php" method="post"> | |
転置インデックスを作成するテキストを入力してください: | |
<input type="text" name="doc"><br> | |
<input type="submit"> | |
</form> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE HTML> | |
<html> | |
<head> | |
<meta charset="UTF-8"> | |
<title>転置インデックス作成</title> | |
</head> | |
<body> | |
<?php | |
$mecab = new MeCab_Tagger(); | |
$textInput = rtrim(htmlspecialchars($_POST['doc'])); | |
if( !empty($textInput) ){ | |
// Mecabでの形態素解析処理start | |
echo "[形態素に分解した結果]<br/>"; | |
// 転置インデックス用の連想配列 | |
$newInvertedIndex = array(); | |
// Mecabでの解析結果を改行コードで分割 | |
$resultSet = explode( "\n" , $mecab->parse( $textInput ) ); | |
foreach( $resultSet as $eachResult ){ | |
if( substr( $eachResult , 0 , 3 ) !== 'EOS' ){ | |
list( $eachMorpheme , $eachInfo ) = explode( "\t" , $eachResult ); | |
echo $eachMorpheme. "<br/>"; | |
// 連想配列の作成 | |
// array( | |
// key(インデックス) => value(ポスティングリスト(出現するドキュメントID) | |
// key(インデックス) => value(ポスティングリスト(出現するドキュメントID) | |
// .... | |
// ) | |
if( isset($newInvertedIndex[$eachMorpheme]) ){ | |
array_push($newInvertedIndex[$eachMorpheme], "2"); | |
}else{ | |
$newInvertedIndex[$eachMorpheme] = array("1"); | |
} | |
}else{ | |
break; | |
} | |
} | |
echo "<br/><br/>"; | |
// invertedIndexの確認 | |
echo "作成したInvertedIndexの確認<br/>"; | |
foreach ($newInvertedIndex as $dic => $postingList) { | |
echo $dic. " => "; | |
foreach ($postingList as $docId) { | |
echo $docId. " "; | |
} | |
echo "<br/>"; | |
} | |
}else{ | |
echo "処理を中断します。<br/>"; | |
} | |
?> | |
<a href="index.html">TOP</a> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
テスト中なだけ。