Skip to content

Instantly share code, notes, and snippets.

@konradkonrad
Last active August 29, 2015 14:05
Show Gist options
  • Save konradkonrad/ca17107192d632f45878 to your computer and use it in GitHub Desktop.
Save konradkonrad/ca17107192d632f45878 to your computer and use it in GitHub Desktop.
multi-language index mappings via templates POC
#!/bin/sh
# This tries to use index templates and dynamic_templates in order to define a
# 'DRY' multi language index-mapping (index per language).
# The 'general' template defines all field mappings (mostly via dynamic_templates) and
# their analyzers (which are underspecified on this level).
# The language templates ('de') do instantiate the the language specific analyzers
# and match the language suffix.
if [ -z "$ES" ]; then
ES="http://localhost:9200/"
echo "\$ES variable not set - using default value: '$ES'\n"
else
echo "You have set the ES environment variable to: '$ES'\n"
fi
echo "\nDelete and create the 'general' template"
curl -XDELETE $ES/_template/general
curl -XPUT $ES/_template/general -d '{
"template": "myindex*",
"order": 0,
"settings": {
"number_of_shards": 1,
"index" : {
"analysis" : {
"analyzer" : {
"ana_index_nolang" : {
"filter" : [ "trim", "lowercase", "asciifolding"],
"tokenizer" : "standard"
}
}
}
}
},
"mappings": {
"document": {
"_source": {
"enabled": true
},
"dynamic_templates": [
{
"string_ngram_lang": {
"match": "*_lang",
"mapping": {
"type": "string",
"index_analyzer": "ana_index_ngram",
"search_analyzer": "ana_search",
"fields": {
"simple": {
"type": "string",
"analyzer": "ana_index_nolang"
}
}
}
}
}],
"properties" : {
"boost" : {
"type" : "double"
}
}
}
}
}'
echo "\nDelete and create the 'de' template"
curl -XDELETE $ES/_template/de
curl -XPUT $ES/_template/de -d '{
"template": "*_de",
"order": 1,
"settings": {
"index" : {
"analysis" : {
"analyzer" : {
"ana_index_ngram" : {
"filter" : [ "trim", "lowercase", "stopwords", "stemmer", "asciifolding", "ngram1_30" ],
"tokenizer" : "standard"
},
"ana_search": {
"filter": [ "trim", "lowercase", "stopwords", "stemmer", "asciifolding" ],
"tokenizer": "standard"
}
},
"filter": {
"stopwords": {
"type": "stop",
"stopwords": ["mit", "auf", "..."]
},
"stemmer": {
"type": "stemmer",
"name": "german"
},
"ngram1_30": {
"type": "nGram",
"min_gram": 1,
"max_gram": 30
}
}
}
}
}
}'
echo "\nDelete index 'myindex_de'"
curl -XDELETE $ES/myindex_de
echo "\nCreate index by putting a 'document'"
curl -XPUT $ES/myindex_de/document/1 -d '{
"myfield_lang": "Der Habicht, das Lamm, Hurz!",
"boost": 9
}'
echo
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment