Created
December 8, 2012 11:47
-
-
Save jprante/4239954 to your computer and use it in GitHub Desktop.
Ein schöner Tag in Köln im Café an der Straßenecke
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# required plugins: analysis-icu, analysis-combo | |
# ./bin/plugin -install elasticsearch/elasticsearch-analysis-icu/1.7.0 | |
# ./bin/plugin -install yakaz/elasticsearch-analysis-combo/1.1.0 | |
# | |
curl -XDELETE 'localhost:9200/ling' | |
echo '{ | |
"settings" : { | |
"index" : { | |
"analysis" : { | |
"analyzer" : { | |
"snow_icu" : { | |
"type" : "custom", | |
"tokenizer" : "icu_tokenizer", | |
"filter" : [ "snowball", "icu_folding" ] | |
}, | |
"default" : { | |
"type" : "combo", | |
"sub_analyzers" : [ "standard", "snow_icu" ], | |
"filter" : "unique" | |
} | |
}, | |
"filter" : { | |
"snowball" : { | |
"type" : "snowball", | |
"language" : "German2" | |
} | |
} | |
} | |
} | |
} | |
}' > settings.json | |
curl -XPUT 'localhost:9200/ling' --data-binary @settings.json | |
curl -XGET 'localhost:9200/ling/_settings?pretty' | |
echo '{ | |
"sentence" : "Ein schöner Tag in Köln im Café an der Straßenecke" | |
}' > ling-1.json | |
echo '{ | |
"sentence" : "Ein schoener Tag in Koeln im Café an der Straßenecke" | |
}' > ling-2.json | |
echo '{ | |
"sentence" : "Ein schoner Tag in Koln im Cafe an der Strassenecke" | |
}' > ling-3.json | |
curl -XPUT 'localhost:9200/ling/test/1' --data-binary @ling-1.json | |
curl -XPUT 'localhost:9200/ling/test/2' --data-binary @ling-2.json | |
curl -XPUT 'localhost:9200/ling/test/3' --data-binary @ling-3.json | |
curl -XGET 'localhost:9200/_refresh' | |
# german umlauts and accents | |
echo '{ | |
"query": { | |
"match": { | |
"sentence": "Ein schöner Tag in Köln im Café an der Straßenecke" | |
} | |
} | |
}' > query-1.json | |
# german umlaut expansion | |
echo '{ | |
"query": { | |
"match": { | |
"sentence": "Ein schoener Tag in Koeln im Café an der Straßenecke" | |
} | |
} | |
}' > query-2.json | |
# base form reduction, sharp s folding | |
echo '{ | |
"query": { | |
"match": { | |
"sentence": "Ein schoner Tag in Koln im Cafe an der Strassenecke" | |
} | |
} | |
}' > query-3.json | |
# compensate snowball overstemming. With snow_icu 1 hit => with combo(standard+snow_icu) 3 hits | |
echo '{ | |
"query": { | |
"match": { | |
"sentence": "cafe" | |
} | |
} | |
}' > query-4.json | |
# 4x3 hits | |
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-1.json | |
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-2.json | |
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-3.json | |
curl -XPOST 'localhost:9200/ling/test/_search?pretty' --data-binary @query-4.json | |
exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment