Skip to content

Instantly share code, notes, and snippets.

@inouetakuya
Last active August 29, 2015 14:10
Show Gist options
  • Save inouetakuya/d64fffb878ca906bd67a to your computer and use it in GitHub Desktop.
Save inouetakuya/d64fffb878ca906bd67a to your computer and use it in GitHub Desktop.
extended analyze プラグインで解析した結果
// $ curl -XPOST 'localhost:9200/_extended_analyze?analyzer=kuromoji&pretty' -d '絶対に手を出してはいけない相手を夜這いしちゃった俺'
{
"custom_analyzer" : false,
"analyzer" : {
"kuromoji" : [ {
"token" : "絶対",
"start_offset" : 0,
"end_offset" : 2,
"type" : "word",
"position" : 1,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : null,
"inflectionType" : null,
"inflectionForm (en)" : null,
"inflectionForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "noun-adverbial",
"partOfSpeech" : "名詞-副詞可能"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "zettai",
"reading" : "ゼッタイ",
"pronunciation (en)" : "zettai",
"pronunciation" : "ゼッタイ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e7 b5 b6 e5 af be]"
}
}
}, {
"token" : "手",
"start_offset" : 3,
"end_offset" : 4,
"type" : "word",
"position" : 3,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : null,
"inflectionType" : null,
"inflectionForm (en)" : null,
"inflectionForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "noun-common",
"partOfSpeech" : "名詞-一般"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "te",
"reading" : "テ",
"pronunciation (en)" : "te",
"pronunciation" : "テ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e6 89 8b]"
}
}
}, {
"token" : "出す",
"start_offset" : 5,
"end_offset" : 7,
"type" : "word",
"position" : 5,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : "出す"
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : "5-row-cons-s",
"inflectionType" : "五段・サ行",
"inflectionForm (en)" : "conjunctive",
"inflectionForm" : "連用形"
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "verb-main",
"partOfSpeech" : "動詞-自立"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "dashi",
"reading" : "ダシ",
"pronunciation (en)" : "dashi",
"pronunciation" : "ダシ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e5 87 ba e3 81 99]"
}
}
}, {
"token" : "いける",
"start_offset" : 9,
"end_offset" : 11,
"type" : "word",
"position" : 8,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : "いける"
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : "1-row",
"inflectionType" : "一段",
"inflectionForm (en)" : "imperfective",
"inflectionForm" : "未然形"
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "verb-auxiliary",
"partOfSpeech" : "動詞-非自立"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "ike",
"reading" : "イケ",
"pronunciation (en)" : "ike",
"pronunciation" : "イケ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e3 81 84 e3 81 91 e3 82 8b]"
}
}
}, {
"token" : "相手",
"start_offset" : 13,
"end_offset" : 15,
"type" : "word",
"position" : 10,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : null,
"inflectionType" : null,
"inflectionForm (en)" : null,
"inflectionForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "noun-common",
"partOfSpeech" : "名詞-一般"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "aite",
"reading" : "アイテ",
"pronunciation (en)" : "aite",
"pronunciation" : "アイテ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e7 9b b8 e6 89 8b]"
}
}
}, {
"token" : "夜這い",
"start_offset" : 16,
"end_offset" : 19,
"type" : "word",
"position" : 12,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : null,
"inflectionType" : null,
"inflectionForm (en)" : null,
"inflectionForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "noun-common",
"partOfSpeech" : "名詞-一般"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "yobai",
"reading" : "ヨバイ",
"pronunciation (en)" : "yobai",
"pronunciation" : "ヨバイ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e5 a4 9c e9 80 99 e3 81 84]"
}
}
}, {
"token" : "ちゃう",
"start_offset" : 20,
"end_offset" : 23,
"type" : "word",
"position" : 14,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : "ちゃう"
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : "5-row-cons-w-cons-onbin",
"inflectionType" : "五段・ワ行促音便",
"inflectionForm (en)" : "conjunctive-ta-connection",
"inflectionForm" : "連用タ接続"
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "verb-auxiliary",
"partOfSpeech" : "動詞-非自立"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "cha",
"reading" : "チャッ",
"pronunciation (en)" : "cha",
"pronunciation" : "チャッ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e3 81 a1 e3 82 83 e3 81 86]"
}
}
}, {
"token" : "俺",
"start_offset" : 24,
"end_offset" : 25,
"type" : "word",
"position" : 16,
"extended_attributes" : {
"org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute" : {
"baseForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute" : {
"inflectionType (en)" : null,
"inflectionType" : null,
"inflectionForm (en)" : null,
"inflectionForm" : null
},
"org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : {
"partOfSpeech (en)" : "noun-pronoun-misc",
"partOfSpeech" : "名詞-代名詞-一般"
},
"org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : {
"reading (en)" : "ore",
"reading" : "オレ",
"pronunciation (en)" : "ore",
"pronunciation" : "オレ"
},
"org.apache.lucene.analysis.tokenattributes.KeywordAttribute" : {
"keyword" : false
},
"org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute" : {
"positionLength" : 1
},
"org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute" : {
"bytes" : "[e4 bf ba]"
}
}
} ]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment