Last active
August 29, 2015 14:00
-
-
Save reedobrien/6c2455f8f82ca9b6b798 to your computer and use it in GitHub Desktop.
multifield indexing?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Example from http://bit.ly/1m7DFKs | |
curl -XDELETE 'localhost:9200/my_index' | |
echo | |
curl -XPUT 'localhost:9200/my_index' -d' | |
{ | |
"settings": { | |
"analysis": { | |
"filter": { | |
"trigrams_filter": { | |
"type": "ngram", | |
"min_gram": 3, | |
"max_gram": 3 | |
} | |
}, | |
"analyzer": { | |
"trigrams": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"trigrams_filter" | |
] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"my_type": { | |
"properties": { | |
"text": { | |
"type": "string", | |
"analyzer": "trigrams" | |
} | |
} | |
} | |
} | |
}' | |
echo | |
curl -XPOST 'localhost:9200/my_index/my_type/_bulk' -d' | |
{ "index": { "_id": 1 }} | |
{ "text": "Aussprachewörterbuch" } | |
{ "index": { "_id": 2 }} | |
{ "text": "Militärgeschichte" } | |
{ "index": { "_id": 3 }} | |
{ "text": "Weißkopfseeadler" } | |
{ "index": { "_id": 4 }} | |
{ "text": "Weltgesundheitsorganisation" } | |
{ "index": { "_id": 5 }} | |
{ "text": "Rindfleischetikettierungsüberwachungsaufgabenübertragungsgesetz" }' | |
echo | |
curl -XGET 'localhost:9200/my_index/my_type/_search' -d' | |
{ | |
"query": { | |
"match": { | |
"text": "Adler" | |
} | |
} | |
}' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XDELETE 'localhost:9200/tc/' | |
echo "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^Deleted " | |
curl -XPUT 'localhost:9200/tc/' -d' | |
{"settings": | |
{ | |
"analysis": { | |
"filter": { | |
"trigrams_filter": { | |
"type": "ngram", | |
"min_gram": 3, | |
"max_gram": 3 | |
} | |
}, | |
"analyzer": { | |
"trigrams": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"trigrams_filter" | |
] | |
} | |
} | |
}, | |
"mappings": { | |
"charm": { | |
"properties": { | |
"name": { | |
"type": "string", | |
"analyzer": "trigrams" | |
} | |
} | |
}, | |
"bundle": { | |
"properties": { | |
"name": { | |
"type": "string", | |
"analyzer": "trigrams" | |
} | |
} | |
} | |
} | |
}}' | |
echo "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^Put mapping" | |
curl -XPUT 'localhost:9200/tc/charm/1' -d '{ | |
"name": "sugarcrm" | |
}' | |
echo | |
curl -XPUT 'localhost:9200/tc/charm/2' -d '{ | |
"name": "couchbase" | |
}' | |
echo | |
curl -XPUT 'localhost:9200/tc/charm/3' -d '{ | |
"name": "mysql" | |
}' | |
echo | |
curl -XPUT 'localhost:9200/tc/charm/4' -d '{ | |
"name": "cf-mysql" | |
}' | |
echo | |
curl -XPUT 'localhost:9200/tc/charm/5' -d '{ | |
"name": "PostgreSQL" | |
}' | |
echo | |
curl 'http://localhost:9200/tc/_search?q=name:crm' | |
echo | |
curl -XPOST 'localhost:9200/tc/_search' -d' | |
{ | |
"query" : { | |
"match":{ | |
"name": "crm" | |
} | |
} | |
}' | |
echo | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Delete the `charms` index | |
DELETE /charms | |
# Map the `text` field to be create ngrams of length 3 | |
PUT /charms | |
{ | |
"settings": { | |
"analysis": { | |
"filter": { | |
"3-20grams_filter": { | |
"type": "ngram", | |
"min_gram": 3, | |
"max_gram": 20 | |
} | |
}, | |
"analyzer": { | |
"3-20grams": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"3-20grams_filter" | |
] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"charm": { | |
"properties": { | |
"name": { | |
"type": "multi_field", | |
"fields":{ | |
"ngrams": { | |
"type": "string", | |
"analyzer": "3-20grams" | |
}, | |
"name":{ | |
"type": "string", | |
"index": "not_analyzed" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
# Test the analyzer | |
GET /charms/_analyze?analyzer=n3_20grams&text=sugarcrm | |
# Index some example docs | |
POST /charms/charm/_bulk | |
{"index":{"_id":1}} | |
{"name":"sugarcrm"} | |
{"index":{"_id":2}} | |
{"name":"couchbase"} | |
{"index":{"_id":3}} | |
{"name":"mysql"} | |
{"index":{"_id":4}} | |
{"name":"cf-mysql"} | |
{"index":{"_id":5}} | |
{"name":"PostgreSQL"} | |
# Hit | |
GET /charms/charm/_search?q=ngrams:crm | |
# 3 Hits | |
GET /charms/charm/_search?q=ngrams:mysql | |
# Hit | |
GET /charms/charm/_search?q=name:sugarcrm | |
# Miss | |
GET /charms/charm/_search?q=name:crm | |
# 3 Hits | |
GET /charms/charm/_search?q=ngrams:sql | |
# Hit | |
GET /charms/charm/_search?q=ngrams:ouch | |
# Hit: Simple search - matches trigram | |
POST /charms/charm/_search | |
{ | |
"query": { | |
"match": { | |
"ngrams": "crm" | |
} | |
} | |
} | |
# Miss: Simple exact search - misses trigram | |
POST /charms/charm/_search | |
{ | |
"query": { | |
"match": { | |
"name": "crm" | |
} | |
} | |
} | |
# Hit: Simple search - matches 3 | |
POST /charms/charm/_search | |
{ | |
"query": { | |
"match": { | |
"ngrams": { | |
"query": "sql" | |
} | |
} | |
} | |
} | |
# Miss: Correct Miss | |
POST /charms/charm/_search | |
{ | |
"query": { | |
"match": { | |
"name": { | |
"query": "sql" | |
} | |
} | |
} | |
} | |
# Have figured explain yet. | |
POST /charms/charm/_validate/query?explain | |
{ | |
"query": { | |
"match": { | |
"name": { | |
"name": "crm" | |
} | |
} | |
} | |
} | |
# Think we need longer names to demo trimming the long tail with minimum_should_match | |
POST /charms/charm/_search | |
{ | |
"query": { | |
"match": { | |
"ngrams": { | |
"query": "ouc", | |
"minimum_should_match": "80%" | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment