Created
January 15, 2013 08:02
-
-
Save purem/4537084 to your computer and use it in GitHub Desktop.
Demonstrates two unwanted results with current elasticsearch setup.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ======================================== | |
# Testing n-gram analysis in ElasticSearch | |
# ======================================== | |
curl -X DELETE localhost:9200/courses | |
curl -X POST http://localhost:9200/courses -d '{ | |
"mappings":{ | |
"course":{ | |
"properties":{ | |
"name":{ | |
"type":"string", | |
"index_analyzer":"autocomplete_analyzer", | |
"search_analyzer":"keyword" | |
} | |
} | |
} | |
}, | |
"settings":{ | |
"number_of_shards":1, | |
"number_of_replicas":1, | |
"analysis":{ | |
"filter":{ | |
"autocomplete_edgeNGram":{ | |
"type":"edgeNGram", | |
"min_gram":2, | |
"max_gram":10, | |
"side":"front" | |
}, | |
"word_filter":{ | |
"type":"word_delimiter", | |
"generate_word_parts":false, | |
"generate_number_parts":false, | |
"split_on_numerics":false, | |
"split_on_case_change":false, | |
"preserve_original":true | |
} | |
}, | |
"analyzer":{ | |
"autocomplete_analyzer":{ | |
"tokenizer":"standard", | |
"filter":[ | |
"asciifolding", | |
"lowercase", | |
"word_filter", | |
"autocomplete_edgeNGram" | |
], | |
"type":"custom" | |
} | |
} | |
} | |
}}' | |
# Test basic ngram functionality, gets correct course | |
curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{ | |
"query":{ | |
"query_string":{ | |
"query":"merc", | |
"fields":[ | |
"name" | |
] | |
} | |
}, | |
"size":4}' | |
# Test with full keyword, gets correct course | |
curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{ | |
"query":{ | |
"query_string":{ | |
"query":"merc", | |
"fields":[ | |
"name" | |
] | |
} | |
}, | |
"size":4}' | |
# Test with full subject without hypen, gets correct course | |
curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{ | |
"query":{ | |
"query_string":{ | |
"query":"mercedes benz", | |
"fields":[ | |
"name" | |
] | |
} | |
}, | |
"size":4}' | |
# Test with full subject without hyphenated, FAILS to gets correct course (No results) | |
curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{ | |
"query":{ | |
"query_string":{ | |
"query":"mercedes-benz", | |
"fields":[ | |
"name" | |
] | |
} | |
}, | |
"size":4}' | |
# { | |
# "took" : 1, | |
# "timed_out" : false, | |
# "_shards" : { | |
# "total" : 1, | |
# "successful" : 1, | |
# "failed" : 0 | |
# }, | |
# "hits" : { | |
# "total" : 0, | |
# "max_score" : null, | |
# "hits" : [ ] | |
# } | |
# }% | |
# Test with full subject without hyphenated, Gets extra unwanted result | |
curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{ | |
"query":{ | |
"query_string":{ | |
"query":"mercedes-benz", | |
"fields":[ | |
"name" | |
] | |
} | |
}, | |
"size":4}' | |
# { | |
# "took" : 1, | |
# "timed_out" : false, | |
# "_shards" : { | |
# "total" : 1, | |
# "successful" : 1, | |
# "failed" : 0 | |
# }, | |
# "hits" : { | |
# "total" : 2, | |
# "max_score" : 0.3592204, | |
# "hits" : [ { | |
# "_index" : "courses", | |
# "_type" : "course", | |
# "_id" : "7", | |
# "_score" : 0.3592204, "_source" : {"name":"Mercedes-Benz Driving School"} | |
# }, { | |
# "_index" : "courses", | |
# "_type" : "course", | |
# "_id" : "13", | |
# "_score" : 0.3592204, "_source" : {"name":"Being Cool With Bond, James Bond"} | |
# } ] | |
# } | |
# }% | |
curl -X POST "http://localhost:9200/courses/_refresh" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment