Last active
August 29, 2015 14:10
-
-
Save inouetakuya/d64fffb878ca906bd67a to your computer and use it in GitHub Desktop.
extended analyze プラグインで解析した結果
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// $ curl -XPOST 'localhost:9200/_extended_analyze?analyzer=kuromoji&pretty' -d '絶対に手を出してはいけない相手を夜這いしちゃった俺' | |
{ | |
"custom_analyzer" : false, | |
"analyzer" : { | |
"kuromoji" : [ { | |
"token" : "絶対", | |
"start_offset" : 0, | |
"end_offset" : 2, | |
"type" : "word", | |
"position" : 1, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : null, | |
"inflectionType" : null, | |
"inflectionForm (en)" : null, | |
"inflectionForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "noun-adverbial", | |
"partOfSpeech" : "名詞-副詞可能" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "zettai", | |
"reading" : "ゼッタイ", | |
"pronunciation (en)" : "zettai", | |
"pronunciation" : "ゼッタイ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e7 b5 b6 e5 af be]" | |
} | |
} | |
}, { | |
"token" : "手", | |
"start_offset" : 3, | |
"end_offset" : 4, | |
"type" : "word", | |
"position" : 3, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : null, | |
"inflectionType" : null, | |
"inflectionForm (en)" : null, | |
"inflectionForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "noun-common", | |
"partOfSpeech" : "名詞-一般" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "te", | |
"reading" : "テ", | |
"pronunciation (en)" : "te", | |
"pronunciation" : "テ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e6 89 8b]" | |
} | |
} | |
}, { | |
"token" : "出す", | |
"start_offset" : 5, | |
"end_offset" : 7, | |
"type" : "word", | |
"position" : 5, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : "出す" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : "5-row-cons-s", | |
"inflectionType" : "五段・サ行", | |
"inflectionForm (en)" : "conjunctive", | |
"inflectionForm" : "連用形" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "verb-main", | |
"partOfSpeech" : "動詞-自立" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "dashi", | |
"reading" : "ダシ", | |
"pronunciation (en)" : "dashi", | |
"pronunciation" : "ダシ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e5 87 ba e3 81 99]" | |
} | |
} | |
}, { | |
"token" : "いける", | |
"start_offset" : 9, | |
"end_offset" : 11, | |
"type" : "word", | |
"position" : 8, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : "いける" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : "1-row", | |
"inflectionType" : "一段", | |
"inflectionForm (en)" : "imperfective", | |
"inflectionForm" : "未然形" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "verb-auxiliary", | |
"partOfSpeech" : "動詞-非自立" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "ike", | |
"reading" : "イケ", | |
"pronunciation (en)" : "ike", | |
"pronunciation" : "イケ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e3 81 84 e3 81 91 e3 82 8b]" | |
} | |
} | |
}, { | |
"token" : "相手", | |
"start_offset" : 13, | |
"end_offset" : 15, | |
"type" : "word", | |
"position" : 10, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : null, | |
"inflectionType" : null, | |
"inflectionForm (en)" : null, | |
"inflectionForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "noun-common", | |
"partOfSpeech" : "名詞-一般" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "aite", | |
"reading" : "アイテ", | |
"pronunciation (en)" : "aite", | |
"pronunciation" : "アイテ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e7 9b b8 e6 89 8b]" | |
} | |
} | |
}, { | |
"token" : "夜這い", | |
"start_offset" : 16, | |
"end_offset" : 19, | |
"type" : "word", | |
"position" : 12, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : null, | |
"inflectionType" : null, | |
"inflectionForm (en)" : null, | |
"inflectionForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "noun-common", | |
"partOfSpeech" : "名詞-一般" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "yobai", | |
"reading" : "ヨバイ", | |
"pronunciation (en)" : "yobai", | |
"pronunciation" : "ヨバイ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e5 a4 9c e9 80 99 e3 81 84]" | |
} | |
} | |
}, { | |
"token" : "ちゃう", | |
"start_offset" : 20, | |
"end_offset" : 23, | |
"type" : "word", | |
"position" : 14, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : "ちゃう" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : "5-row-cons-w-cons-onbin", | |
"inflectionType" : "五段・ワ行促音便", | |
"inflectionForm (en)" : "conjunctive-ta-connection", | |
"inflectionForm" : "連用タ接続" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "verb-auxiliary", | |
"partOfSpeech" : "動詞-非自立" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "cha", | |
"reading" : "チャッ", | |
"pronunciation (en)" : "cha", | |
"pronunciation" : "チャッ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e3 81 a1 e3 82 83 e3 81 86]" | |
} | |
} | |
}, { | |
"token" : "俺", | |
"start_offset" : 24, | |
"end_offset" : 25, | |
"type" : "word", | |
"position" : 16, | |
"extended_attributes" : { | |
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : { | |
"baseForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : { | |
"inflectionType (en)" : null, | |
"inflectionType" : null, | |
"inflectionForm (en)" : null, | |
"inflectionForm" : null | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { | |
"partOfSpeech (en)" : "noun-pronoun-misc", | |
"partOfSpeech" : "名詞-代名詞-一般" | |
}, | |
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { | |
"reading (en)" : "ore", | |
"reading" : "オレ", | |
"pronunciation (en)" : "ore", | |
"pronunciation" : "オレ" | |
}, | |
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : { | |
"keyword" : false | |
}, | |
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : { | |
"positionLength" : 1 | |
}, | |
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : { | |
"bytes" : "[e4 bf ba]" | |
} | |
} | |
} ] | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment