Skip to content

Instantly share code, notes, and snippets.

@masazdream
Created May 21, 2013 11:15
Show Gist options
  • Save masazdream/5619070 to your computer and use it in GitHub Desktop.
Save masazdream/5619070 to your computer and use it in GitHub Desktop.
/**
* 2つのmecab結果のベクトル内積を計算し、正規化を行う。
*
* @param $mecab_rt1 テキスト1
* @param $mecab_rt2 テキスト2
*/
public static function textmatching($mecab_rt1, $mecab_rt2){
// 改行コードで分けて配列に格納する
$mecab_array1 = explode("\n", $mecab_rt1);
$mecab_array2 = explode("\n", $mecab_rt2);
$word_list = array();
foreach($mecab_array1 as $words1){
$word_array1 = explode(",", $words1);
$type = $word_array1[0];
if($type == 'EOS'){
break;
}
if(count($word_array1) > 5){
$value = $word_array1[6];
}else{
$value = 'no valid word.';
}
$type_name = explode("\t", $type);
if(count($type_name) > 1){
$prop = $type_name[1];
}else{
$prop = $type_name[0];
}
if(($prop=='名詞' or $prop=='動詞' or $prop=='形容詞') and $value !='*' ){
if(array_key_exists($value, $word_list)){
$word_list[$value] = $word_list[$value] + 1;
}else{
$word_list[$value] = '1';
}
}
}
$word_list2 = array();
foreach($mecab_array2 as $words2){
$word_array2 = explode(",", $words2);
$type = $word_array2[0];
if($type == 'EOS'){
break;
}
if(count($word_array2) > 5){
$value = $word_array2[6];
}else{
$value = 'no valid word.';
}
$type_name = explode("\t", $type);
if(count($type_name) > 1){
$prop = $type_name[1];
}else{
$prop = $type_name[0];
}
if(($prop=='名詞' or $prop=='動詞' or $prop=='形容詞') and $value !='*' ){
if(array_key_exists($value, $word_list2)){
$word_list2[$value] = $word_list2[$value] + 1;
}else{
$word_list2[$value] = '1';
}
}
}
if(empty($word_list) or empty($word_list)){
$error_message = '有効な単語を含みません';
return $error_message;
}
// 各スケールベクトルを作成
$vector1 = 0;
foreach($word_list as $word=>$value){
$vector1 += pow($value, 2);
}
$scale1 = sqrt($vector1);
$vector2 = 0;
foreach($word_list2 as $word=>$value){
$vector2 += pow($value, 2);
}
$scale2 = sqrt($vector2);
// 内積をとる
$inner = 0;
foreach($word_list as $word=>$value){
if(array_key_exists($word, $word_list2)){
$score1 = $value;
$score2 = $word_list2[$word];
$inner += ($score1 * $score2);
}
}
$similarity = 0;
if(($scale1 * $scale2) != 0){
$similarity = $inner / ($scale1 * $scale2);
}
return $similarity;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment