Last active
November 27, 2018 01:37
-
-
Save kosho/2250bb3e9445786f7c3788541dcee337 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
# Kuromoji/ICU Tokenizer comparison Test | |
################################################## | |
# config/userdict_ja.txt | |
# ルークスカイウォーカー,ルーク スカイウォーカー,ルーク スカイウォーカー,カスタム名詞 | |
DELETE app-search-test | |
PUT app-search-test | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"number_of_replicas": 0, | |
"analysis": { | |
"analyzer": { | |
"iq_text_base": { | |
"tokenizer": "icu_tokenizer", | |
"filter": [ | |
"icu_folding", | |
"ja-stop-words-filter" | |
] | |
}, | |
"my_kuromoji": { | |
"type": "custom", | |
"tokenizer": "my_kuromoji_tokenizer" | |
} | |
}, | |
"filter": { | |
"ja-stop-words-filter": { | |
"type": "stop", | |
"stopwords": "_english_" | |
} | |
}, | |
"tokenizer": { | |
"my_kuromoji_tokenizer": { | |
"type": "kuromoji_tokenizer", | |
"user_dictionary": "userdict_ja.txt", | |
"mode": "search" | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"doc": { | |
"properties": { | |
"title": { | |
"type": "text", | |
"fields": { | |
"kuromoji": { | |
"type": "text", | |
"analyzer": "kuromoji", | |
"fielddata": true | |
}, | |
"my_kuromoji": { | |
"type": "text", | |
"analyzer": "my_kuromoji", | |
"fielddata": true | |
}, | |
"iq_text_base": { | |
"type": "text", | |
"analyzer": "iq_text_base", | |
"fielddata": true | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
GET app-search-test/_analyze | |
{ | |
"explain": true, | |
"analyzer": "kuromoji", | |
"text": "ルークスカイウォーカー" | |
} | |
GET app-search-test/_analyze | |
{ | |
"explain": true, | |
"analyzer": "my_kuromoji", | |
"text": "ルークスカイウォーカー" | |
} | |
GET app-search-test/_analyze | |
{ | |
"explain": true, | |
"analyzer": "iq_text_base", | |
"text": "ルークスカイウォーカー" | |
} | |
# Search actual document | |
PUT app-search-test/doc/1 | |
{ | |
"title": "ルーク・スカイウォーカー" | |
} | |
GET app-search-test/_search | |
{ | |
"query": { | |
"match": { | |
"title.kuromoji": "ルークスカイウォーカー" | |
} | |
} | |
} | |
GET app-search-test/_search | |
{ | |
"query": { | |
"match": { | |
"title.my_kuromoji": "ルークスカイウォーカー" | |
} | |
} | |
} | |
GET app-search-test/_search | |
{ | |
"query": { | |
"match": { | |
"title.iq_text_base": "ルークスカイウォーカー" | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment