Skip to content

Instantly share code, notes, and snippets.

@jprante
Created December 8, 2012 11:47
Show Gist options
  • Save jprante/4239954 to your computer and use it in GitHub Desktop.
Save jprante/4239954 to your computer and use it in GitHub Desktop.
Ein schöner Tag in Köln im Café an der Straßenecke
# required plugins: analysis-icu, analysis-combo
# ./bin/plugin -install elasticsearch/elasticsearch-analysis-icu/1.7.0
# ./bin/plugin -install yakaz/elasticsearch-analysis-combo/1.1.0
#
curl -XDELETE 'localhost:9200/ling'
echo '{
"settings" : {
"index" : {
"analysis" : {
"analyzer" : {
"snow_icu" : {
"type" : "custom",
"tokenizer" : "icu_tokenizer",
"filter" : [ "snowball", "icu_folding" ]
},
"default" : {
"type" : "combo",
"sub_analyzers" : [ "standard", "snow_icu" ],
"filter" : "unique"
}
},
"filter" : {
"snowball" : {
"type" : "snowball",
"language" : "German2"
}
}
}
}
}
}' > settings.json
curl -XPUT 'localhost:9200/ling' --data-binary @settings.json
curl -XGET 'localhost:9200/ling/_settings?pretty'
echo '{
"sentence" : "Ein schöner Tag in Köln im Café an der Straßenecke"
}' > ling-1.json
echo '{
"sentence" : "Ein schoener Tag in Koeln im Café an der Straßenecke"
}' > ling-2.json
echo '{
"sentence" : "Ein schoner Tag in Koln im Cafe an der Strassenecke"
}' > ling-3.json
curl -XPUT 'localhost:9200/ling/test/1' --data-binary @ling-1.json
curl -XPUT 'localhost:9200/ling/test/2' --data-binary @ling-2.json
curl -XPUT 'localhost:9200/ling/test/3' --data-binary @ling-3.json
curl -XGET 'localhost:9200/_refresh'
# german umlauts and accents
echo '{
"query": {
"match": {
"sentence": "Ein schöner Tag in Köln im Café an der Straßenecke"
}
}
}' > query-1.json
# german umlaut expansion
echo '{
"query": {
"match": {
"sentence": "Ein schoener Tag in Koeln im Café an der Straßenecke"
}
}
}' > query-2.json
# base form reduction, sharp s folding
echo '{
"query": {
"match": {
"sentence": "Ein schoner Tag in Koln im Cafe an der Strassenecke"
}
}
}' > query-3.json
# compensate snowball overstemming. With snow_icu 1 hit => with combo(standard+snow_icu) 3 hits
echo '{
"query": {
"match": {
"sentence": "cafe"
}
}
}' > query-4.json
# 4x3 hits
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-1.json
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-2.json
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-3.json
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-4.json
exit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment