Skip to content

Instantly share code, notes, and snippets.

@reedobrien
Last active August 29, 2015 14:00
Show Gist options
  • Save reedobrien/6c2455f8f82ca9b6b798 to your computer and use it in GitHub Desktop.
Save reedobrien/6c2455f8f82ca9b6b798 to your computer and use it in GitHub Desktop.
multifield indexing?
## Example from http://bit.ly/1m7DFKs
curl -XDELETE 'localhost:9200/my_index'
echo
curl -XPUT 'localhost:9200/my_index' -d'
{
"settings": {
"analysis": {
"filter": {
"trigrams_filter": {
"type": "ngram",
"min_gram": 3,
"max_gram": 3
}
},
"analyzer": {
"trigrams": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"trigrams_filter"
]
}
}
}
},
"mappings": {
"my_type": {
"properties": {
"text": {
"type": "string",
"analyzer": "trigrams"
}
}
}
}
}'
echo
curl -XPOST 'localhost:9200/my_index/my_type/_bulk' -d'
{ "index": { "_id": 1 }}
{ "text": "Aussprachewörterbuch" }
{ "index": { "_id": 2 }}
{ "text": "Militärgeschichte" }
{ "index": { "_id": 3 }}
{ "text": "Weißkopfseeadler" }
{ "index": { "_id": 4 }}
{ "text": "Weltgesundheitsorganisation" }
{ "index": { "_id": 5 }}
{ "text": "Rindfleischetikettierungsüberwachungsaufgabenübertragungsgesetz" }'
echo
curl -XGET 'localhost:9200/my_index/my_type/_search' -d'
{
"query": {
"match": {
"text": "Adler"
}
}
}'
curl -XDELETE 'localhost:9200/tc/'
echo "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^Deleted "
curl -XPUT 'localhost:9200/tc/' -d'
{"settings":
{
"analysis": {
"filter": {
"trigrams_filter": {
"type": "ngram",
"min_gram": 3,
"max_gram": 3
}
},
"analyzer": {
"trigrams": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"trigrams_filter"
]
}
}
},
"mappings": {
"charm": {
"properties": {
"name": {
"type": "string",
"analyzer": "trigrams"
}
}
},
"bundle": {
"properties": {
"name": {
"type": "string",
"analyzer": "trigrams"
}
}
}
}
}}'
echo "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^Put mapping"
curl -XPUT 'localhost:9200/tc/charm/1' -d '{
"name": "sugarcrm"
}'
echo
curl -XPUT 'localhost:9200/tc/charm/2' -d '{
"name": "couchbase"
}'
echo
curl -XPUT 'localhost:9200/tc/charm/3' -d '{
"name": "mysql"
}'
echo
curl -XPUT 'localhost:9200/tc/charm/4' -d '{
"name": "cf-mysql"
}'
echo
curl -XPUT 'localhost:9200/tc/charm/5' -d '{
"name": "PostgreSQL"
}'
echo
curl 'http://localhost:9200/tc/_search?q=name:crm'
echo
curl -XPOST 'localhost:9200/tc/_search' -d'
{
"query" : {
"match":{
"name": "crm"
}
}
}'
echo
# Delete the `charms` index
DELETE /charms
# Map the `text` field to be create ngrams of length 3
PUT /charms
{
"settings": {
"analysis": {
"filter": {
"3-20grams_filter": {
"type": "ngram",
"min_gram": 3,
"max_gram": 20
}
},
"analyzer": {
"3-20grams": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"3-20grams_filter"
]
}
}
}
},
"mappings": {
"charm": {
"properties": {
"name": {
"type": "multi_field",
"fields":{
"ngrams": {
"type": "string",
"analyzer": "3-20grams"
},
"name":{
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
# Test the analyzer
GET /charms/_analyze?analyzer=n3_20grams&text=sugarcrm
# Index some example docs
POST /charms/charm/_bulk
{"index":{"_id":1}}
{"name":"sugarcrm"}
{"index":{"_id":2}}
{"name":"couchbase"}
{"index":{"_id":3}}
{"name":"mysql"}
{"index":{"_id":4}}
{"name":"cf-mysql"}
{"index":{"_id":5}}
{"name":"PostgreSQL"}
# Hit
GET /charms/charm/_search?q=ngrams:crm
# 3 Hits
GET /charms/charm/_search?q=ngrams:mysql
# Hit
GET /charms/charm/_search?q=name:sugarcrm
# Miss
GET /charms/charm/_search?q=name:crm
# 3 Hits
GET /charms/charm/_search?q=ngrams:sql
# Hit
GET /charms/charm/_search?q=ngrams:ouch
# Hit: Simple search - matches trigram
POST /charms/charm/_search
{
"query": {
"match": {
"ngrams": "crm"
}
}
}
# Miss: Simple exact search - misses trigram
POST /charms/charm/_search
{
"query": {
"match": {
"name": "crm"
}
}
}
# Hit: Simple search - matches 3
POST /charms/charm/_search
{
"query": {
"match": {
"ngrams": {
"query": "sql"
}
}
}
}
# Miss: Correct Miss
POST /charms/charm/_search
{
"query": {
"match": {
"name": {
"query": "sql"
}
}
}
}
# Have figured explain yet.
POST /charms/charm/_validate/query?explain
{
"query": {
"match": {
"name": {
"name": "crm"
}
}
}
}
# Think we need longer names to demo trimming the long tail with minimum_should_match
POST /charms/charm/_search
{
"query": {
"match": {
"ngrams": {
"query": "ouc",
"minimum_should_match": "80%"
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment