Last active
August 29, 2015 14:05
-
-
Save konradkonrad/ca17107192d632f45878 to your computer and use it in GitHub Desktop.
multi-language index mappings via templates POC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# This tries to use index templates and dynamic_templates in order to define a | |
# 'DRY' multi language index-mapping (index per language). | |
# The 'general' template defines all field mappings (mostly via dynamic_templates) and | |
# their analyzers (which are underspecified on this level). | |
# The language templates ('de') do instantiate the the language specific analyzers | |
# and match the language suffix. | |
if [ -z "$ES" ]; then | |
ES="http://localhost:9200/" | |
echo "\$ES variable not set - using default value: '$ES'\n" | |
else | |
echo "You have set the ES environment variable to: '$ES'\n" | |
fi | |
echo "\nDelete and create the 'general' template" | |
curl -XDELETE $ES/_template/general | |
curl -XPUT $ES/_template/general -d '{ | |
"template": "myindex*", | |
"order": 0, | |
"settings": { | |
"number_of_shards": 1, | |
"index" : { | |
"analysis" : { | |
"analyzer" : { | |
"ana_index_nolang" : { | |
"filter" : [ "trim", "lowercase", "asciifolding"], | |
"tokenizer" : "standard" | |
} | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"document": { | |
"_source": { | |
"enabled": true | |
}, | |
"dynamic_templates": [ | |
{ | |
"string_ngram_lang": { | |
"match": "*_lang", | |
"mapping": { | |
"type": "string", | |
"index_analyzer": "ana_index_ngram", | |
"search_analyzer": "ana_search", | |
"fields": { | |
"simple": { | |
"type": "string", | |
"analyzer": "ana_index_nolang" | |
} | |
} | |
} | |
} | |
}], | |
"properties" : { | |
"boost" : { | |
"type" : "double" | |
} | |
} | |
} | |
} | |
}' | |
echo "\nDelete and create the 'de' template" | |
curl -XDELETE $ES/_template/de | |
curl -XPUT $ES/_template/de -d '{ | |
"template": "*_de", | |
"order": 1, | |
"settings": { | |
"index" : { | |
"analysis" : { | |
"analyzer" : { | |
"ana_index_ngram" : { | |
"filter" : [ "trim", "lowercase", "stopwords", "stemmer", "asciifolding", "ngram1_30" ], | |
"tokenizer" : "standard" | |
}, | |
"ana_search": { | |
"filter": [ "trim", "lowercase", "stopwords", "stemmer", "asciifolding" ], | |
"tokenizer": "standard" | |
} | |
}, | |
"filter": { | |
"stopwords": { | |
"type": "stop", | |
"stopwords": ["mit", "auf", "..."] | |
}, | |
"stemmer": { | |
"type": "stemmer", | |
"name": "german" | |
}, | |
"ngram1_30": { | |
"type": "nGram", | |
"min_gram": 1, | |
"max_gram": 30 | |
} | |
} | |
} | |
} | |
} | |
}' | |
echo "\nDelete index 'myindex_de'" | |
curl -XDELETE $ES/myindex_de | |
echo "\nCreate index by putting a 'document'" | |
curl -XPUT $ES/myindex_de/document/1 -d '{ | |
"myfield_lang": "Der Habicht, das Lamm, Hurz!", | |
"boost": 9 | |
}' | |
echo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment