Last active
August 29, 2015 14:08
-
-
Save nono/40bc5a2f6fd5c0fd86f2 to your computer and use it in GitHub Desktop.
I can't understand why Elasticsearch can't find Europe with a snowball analyzer...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text: | |
- "Western Europe antiquities" | |
- "Antiquités de l'Europe occidentale" | |
analyzer: | |
snowball_en: | |
type: snowball | |
language: English | |
snowball_fr: | |
type: custom | |
tokenizer: standard | |
filter: | |
- standard | |
- lowercase | |
- elision_fr | |
- stop_fr | |
- my_asciifolding | |
- snowball_fr | |
filter: | |
my_asciifolding: | |
type: asciifolding | |
preserve_original: true | |
elision_fr: | |
type: elision | |
articles: | |
- l | |
- d | |
- m | |
stop_fr: | |
type: stop | |
language: _french_ | |
snowball_fr: | |
type: snowball | |
langugage: French |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
_index: collections | |
_type: collection | |
_id: 6 | |
name: | |
en: "Western Europe antiquities" | |
fr: "Antiquités de l'Europe occidentale" | |
--- | |
_index: collections | |
_type: collection | |
_id: 13 | |
name: | |
en: "Islamic art" | |
fr: "Arts de l'Islam" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
collection: | |
properties: | |
name: | |
properties: | |
en: | |
type: string | |
analyzer: snowball | |
fr: | |
type: string | |
analyzer: snowball_fr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export ELASTICSEARCH_ENDPOINT="http://localhost:9200" | |
# Create indexes | |
curl -XPUT "$ELASTICSEARCH_ENDPOINT/collections" -d '{ | |
"settings": { | |
"analysis": { | |
"text": [ | |
"Western Europe antiquities", | |
"Antiquités de l\'Europe occidentale" | |
], | |
"analyzer": { | |
"snowball_en": { | |
"type": "snowball", | |
"language": "English" | |
}, | |
"snowball_fr": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"standard", | |
"lowercase", | |
"elision_fr", | |
"stop_fr", | |
"my_asciifolding", | |
"snowball_fr" | |
] | |
} | |
}, | |
"filter": { | |
"my_asciifolding": { | |
"type": "asciifolding", | |
"preserve_original": true | |
}, | |
"elision_fr": { | |
"type": "elision", | |
"articles": [ | |
"l", | |
"d", | |
"m" | |
] | |
}, | |
"stop_fr": { | |
"type": "stop", | |
"language": "_french_" | |
}, | |
"snowball_fr": { | |
"type": "snowball", | |
"langugage": "French" | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"collection": { | |
"properties": { | |
"name": { | |
"properties": { | |
"en": { | |
"type": "string", | |
"analyzer": "snowball" | |
}, | |
"fr": { | |
"type": "string", | |
"analyzer": "snowball_fr" | |
} | |
} | |
} | |
} | |
} | |
} | |
}' | |
# Index documents | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_bulk?refresh=true" -d ' | |
{"index":{"_index":"collections","_type":"collection","_id":6}} | |
{"name":{"en":"Western Europe antiquities","fr":"Antiquités de l\'Europe occidentale"}} | |
{"index":{"_index":"collections","_type":"collection","_id":13}} | |
{"name":{"en":"Islamic art","fr":"Arts de l\'Islam"}} | |
' | |
# Do searches | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d ' | |
{ | |
"query": { | |
"query_string": { | |
"query": "Europe", | |
"analyzer": "snowball_fr" | |
} | |
}, | |
"explain": true | |
} | |
' | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d ' | |
{ | |
"query": { | |
"query_string": { | |
"query": "Europe", | |
"analyzer": "snowball" | |
} | |
}, | |
"explain": true | |
} | |
' | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d ' | |
{ | |
"query": { | |
"query_string": { | |
"query": "Europe", | |
"analyzer": "standard" | |
} | |
}, | |
"explain": true | |
} | |
' | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d ' | |
{ | |
"query": { | |
"query_string": { | |
"query": "name.en:Europe", | |
"analyzer": "snowball" | |
} | |
}, | |
"explain": true | |
} | |
' | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d ' | |
{ | |
"query": { | |
"query_string": { | |
"query": "Europe*", | |
"analyzer": "snowball" | |
} | |
}, | |
"explain": true | |
} | |
' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Auto generated by Found's Play-tool at 2014-10-23T09:23:09+02:00 | |
version: 0 | |
title: "Why ES can't find Europe?" | |
description: "I can't understand why Elasticsearch can't find Europe with a snowball analyzer..." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sample searches. Press Ctrl-Enter to run. | |
# Press Shift-Enter to go to focused mode for the selected editor. | |
# Click "Help" for more shortcuts. | |
query: | |
query_string: | |
query: Europe | |
analyzer: snowball_fr | |
explain: true | |
# -> Not found | |
--- | |
# This is search #2. | |
query: | |
query_string: | |
query: Art | |
analyzer: snowball | |
explain: true | |
# -> Not found | |
--- | |
# This is search #3. | |
query: | |
query_string: | |
query: Europe | |
analyzer: standard | |
explain: true | |
# -> Found | |
--- | |
# This is search #4. | |
query: | |
query_string: | |
query: "name.en:Europe" | |
analyzer: snowball | |
explain: true | |
# -> Found | |
--- | |
# This is search #5. | |
query: | |
query_string: | |
query: "Europe*" | |
analyzer: snowball | |
explain: true | |
# -> Found |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment