-
-
Save b14r/b3425cdf048069069624b61fa640cc9b to your computer and use it in GitHub Desktop.
[PHP] gooラボ( https://labs.goo.ne.jp/ )の文字列解析系API用のクラス(やっつけ)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
本番利用にはクレジット表記が必要なので注意 | |
https://labs.goo.ne.jp/apiusage/ | |
*/ | |
class gooAPI | |
{ | |
private $app_id = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'; | |
private $api_base = 'https://labs.goo.ne.jp/api/'; | |
private function request($path = null, $data = array(), $is_json = false) | |
{ | |
$data['app_id'] = $this->app_id; | |
$url = $this->api_base.trim($path, '/'); | |
$ch = curl_init($url); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_TIMEOUT, 20); | |
curl_setopt($ch, CURLOPT_POST, true); | |
if ($is_json) { | |
$data = json_encode($data); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, $data); | |
curl_setopt($ch, CURLOPT_HTTPHEADER, array( | |
'Content-Type: application/json', | |
'Content-Length: '.strlen($data) | |
)); | |
} else { | |
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data)); | |
} | |
$response = curl_exec($ch); | |
$info = curl_getinfo($ch); | |
curl_close($ch); | |
if (!empty($info['http_code']) && 200 == $info['http_code']) { | |
return json_decode($response, true); | |
} else { | |
vd($info, $response); | |
} | |
} | |
// 形態素解析API | |
public function morph($sentence = null, $info_filter = null, $pos_filter = null) | |
{ | |
if (empty($sentence) || !is_string($sentence)) { | |
return false; | |
} | |
$sentence = str_replace(array("\r", "\n", "\t"), '', $sentence); | |
$data = $this->request('morph', array( | |
'info_filter' => $info_filter, | |
'pos_filter' => $pos_filter, | |
'sentence' => $sentence | |
)); | |
if (!empty($data['word_list'])) { | |
return $data['word_list']; | |
} | |
} | |
// 形態素解析APIの返り値で、主要な語句のみ抽出 | |
public function morphSimple($sentence = null) | |
{ | |
$data = $this->morph($sentence, null, '名詞|名詞接尾辞|動詞語幹|動詞活用語尾|動詞接尾辞|動詞活用語尾|連用詞|判定詞|Alphabet'); | |
if (empty($data)) { | |
return null; | |
} | |
// 名詞系と動詞系は1つにまとめる | |
$verb_word = $verb_kana = $words = array(); | |
$before = null; | |
foreach($data as $array) { | |
foreach($array as $key => $val) { | |
if (false !== mb_strpos($val[1], '動詞', 0, 'utf8')) { | |
$verb_word[] = $val[0]; | |
$verb_kana[] = $val[2]; | |
} else { | |
if (!empty($verb_word)) { | |
$word = implode('', $verb_word); | |
$words[$word] = array( | |
'word' => $word, | |
'kana' => implode('', $verb_kana), | |
'type' => '動詞' | |
); | |
$verb_word = $verb_kana = array(); | |
} | |
if (!empty($before) && '名詞接尾辞' === $val[1]) { | |
$words[$before]['word'] .= $val[0]; | |
$words[$before]['kana'] .= $val[2]; | |
} elseif('判定詞' !== $val[1]) { | |
$words[$val[0]] = array( | |
'word' => $val[0], | |
'kana' => $val[2], | |
'type' => $val[1] | |
); | |
$before = $val[0]; | |
} | |
} | |
} | |
} | |
if (!empty($verb_word)) { | |
$word = implode('', $verb_word); | |
$words[] = array( | |
'word' => $word, | |
'kana' => implode('', $verb_kana), | |
'type' => '動詞' | |
); | |
} | |
return array_values($words); | |
} | |
// ひらがな化API | |
public function toHiragana($sentence = null) | |
{ | |
if (empty($sentence)) { | |
return false; | |
} | |
$data = $this->request('hiragana', array( | |
'output_type' => 'hiragana', | |
'sentence' => $sentence | |
)); | |
if (!empty($data['converted'])) { | |
return $data['converted']; | |
} | |
} | |
// カタカナ化API | |
public function toKatakana($sentence = null) | |
{ | |
if (empty($sentence)) { | |
return false; | |
} | |
$data = $this->request('hiragana', array( | |
'output_type' => 'katakana', | |
'sentence' => $sentence | |
)); | |
if (!empty($data['converted'])) { | |
return $data['converted']; | |
} | |
} | |
// 固有表現抽出API | |
public function entity($sentence = null, $class_filter = null) | |
{ | |
if (empty($sentence) || !is_string($sentence)) { | |
return false; | |
} | |
$sentence = str_replace(array("\r", "\n", "\t"), '', $sentence); | |
$data = $this->request('entity', array( | |
'class_filter' => $class_filter, | |
'sentence' => $sentence | |
)); | |
if (!empty($data['ne_list'])) { | |
return $data['ne_list']; | |
} | |
} | |
// 語句類似度算出API | |
public function similarity($word1 = null, $word2 = null) | |
{ | |
if (empty($word1) || !is_string($word1) || empty($word2) || !is_string($word2)) { | |
return false; | |
} | |
$word1 = str_replace(array("\r", "\n", "\t"), '', $word1); | |
$word2 = str_replace(array("\r", "\n", "\t"), '', $word2); | |
$data = $this->request('similarity', array( | |
'query_pair' => array($word1, $word2) | |
), true); | |
if (array_key_exists('score', $data)) { | |
return (int)($data['score']*100); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment