Created
December 14, 2017 09:43
-
-
Save spinscale/0a77f04cab9ed6150c56057f1fd3fa2f to your computer and use it in GitHub Desktop.
Training Examples (Zurich)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##### decompounder example | |
DELETE decompound | |
PUT decompound | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"my_decompound_analyzer" : { | |
"type" : "custom", | |
"tokenizer" : "standard", | |
"filter" : [ "lowercase", "my_decompound_filter" ] | |
}, | |
"my_decompound_analyzer_unique" : { | |
"type" : "custom", | |
"tokenizer" : "standard", | |
"filter" : [ "lowercase", "my_decompound_filter", "unique" ] | |
} | |
}, | |
"filter": { | |
"my_decompound_filter" : { | |
"type" : "dictionary_decompounder", | |
"word_list" : [ "topf", "schiff" ] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"doc" : { | |
"properties": { | |
"name" : { | |
"type": "text", | |
"analyzer": "my_decompound_analyzer", | |
"fields": { | |
"unique" : { | |
"type": "text", | |
"analyzer": "my_decompound_analyzer_unique" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
GET decompound/_settings | |
POST decompound/_analyze | |
{ | |
"text": "blumentopf" | |
} | |
POST decompound/_analyze | |
{ | |
"text": "blumentOPF", | |
"filter": [ "lowercase", "my_decompound_filter" ], | |
"tokenizer": "standard" | |
} | |
POST decompound/_analyze | |
{ | |
"text": "blumentopf", | |
"analyzer": "my_decompound_analyzer" | |
} | |
POST decompound/_analyze | |
{ | |
"text": "dampfschiff", | |
"analyzer": "my_decompound_analyzer" | |
} | |
POST decompound/_analyze | |
{ | |
"text": "blumentopf blumentopf blumentopf", | |
"analyzer": "my_decompound_analyzer" | |
} | |
POST decompound/_analyze | |
{ | |
"text": "blumentopf blumentopf blumentopf", | |
"analyzer": "my_decompound_analyzer_unique" | |
} | |
PUT decompound/doc/1 | |
{ | |
"name" : "ein wunderschöner blumentopf" | |
} | |
GET decompound/doc/_search | |
{ | |
"query": { | |
"match": { | |
"name": "topf" | |
} | |
} | |
} | |
GET decompound/doc/_search | |
{ | |
"query": { | |
"match": { | |
"name.unique": "topf" | |
} | |
} | |
} | |
# https://github.com/jprante/elasticsearch-analysis-decompound | |
# reverse token filter, suffix based searching | |
DELETE reverse | |
PUT reverse | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"reverse" : { | |
"type" : "custom", | |
"tokenizer" : "standard", | |
"filter" : [ "lowercase", "reverse" ] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"doc" : { | |
"properties": { | |
"name" : { | |
"type": "text", | |
"fields": { | |
"reverse" : { | |
"type": "text", | |
"analyzer": "reverse" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
PUT reverse/doc/1 | |
{ | |
"name" : "blumentopf" | |
} | |
# inefficient, inverted index traversal is tricky | |
GET reverse/_search | |
{ | |
"query": { | |
"wildcard": { | |
"name": { | |
"value": "*topf" | |
} | |
} | |
} | |
} | |
GET reverse/_search | |
{ | |
"query": { | |
"prefix": { | |
"name.reverse": { | |
"value": "fpot" | |
} | |
} | |
} | |
} | |
# highlight with synonyms | |
DELETE synonyms | |
PUT synonyms | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"query_analyzer_with_synonyms": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"my_synonyms" | |
] | |
} | |
}, | |
"filter": { | |
"my_synonyms": { | |
"type": "synonym_graph", | |
"synonyms": [ | |
"bmw, bayrische motorenwerke", | |
"vw, volkswagen" | |
] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"doc": { | |
"properties": { | |
"name": { | |
"type": "text", | |
"analyzer": "standard", | |
"search_analyzer": "query_analyzer_with_synonyms" | |
} | |
} | |
} | |
} | |
} | |
PUT synonyms/doc/1 | |
{ | |
"name" : "Volkswagen Golf" | |
} | |
GET synonyms/_search | |
{ | |
"query": { | |
"match": { | |
"name": "vw" | |
} | |
}, | |
"highlight": { | |
"fields": { | |
"name": {} | |
} | |
} | |
} | |
PUT synonyms/doc/2 | |
{ | |
"name" : "This is a BMW 320i, which is a nice car. BMW stands for Bayerische Motorenwerke" | |
} | |
GET synonyms/_search | |
{ | |
"query": { | |
"match": { | |
"name": "bmw" | |
} | |
}, | |
"highlight": { | |
"fields": { | |
"name": {} | |
} | |
} | |
} | |
GET synonyms/_search | |
{ | |
"query": { | |
"match": { | |
"name": "bayrische motorenwerke" | |
} | |
}, | |
"highlight": { | |
"fields": { | |
"name": { | |
"type" : "plain" | |
} | |
} | |
} | |
} | |
GET synonyms/_search | |
{ | |
"query": { | |
"match": { | |
"name": "bayrische motorenwerke" | |
} | |
}, | |
"highlight": { | |
"fields": { | |
"name": { | |
"type" : "unified" | |
} | |
} | |
} | |
} | |
# italian job | |
DELETE join | |
PUT join | |
{ | |
"mappings": { | |
"doc": { | |
"properties": { | |
"my_join_field": { | |
"type": "join", | |
"relations": { | |
"company": "preferred_company" | |
} | |
} | |
} | |
} | |
} | |
} | |
PUT join/doc/1 | |
{ | |
"text": "Amazon Logistics GmbH", | |
"my_join_field": { | |
"name": "company" | |
} | |
} | |
PUT join/doc/2 | |
{ | |
"text": "Amazon fulfilment A/S", | |
"my_join_field": { | |
"name": "company" | |
} | |
} | |
GET join/_search | |
{ | |
"query": { | |
"match": { | |
"text": "amazon" | |
} | |
} | |
} | |
# alex prefers the logistics gmbh | |
PUT join/doc/3?routing=1 | |
{ | |
"type" : "preferred_company", | |
"company": "Amazon Logistics GmbH", | |
"user" : "alex", | |
"my_join_field": { | |
"name": "preferred_company", | |
"parent" : "1" | |
} | |
} | |
PUT join/doc/4?routing=2 | |
{ | |
"type" : "preferred_company", | |
"text": "Amazon fulfilment A/S", | |
"user" : "paul", | |
"my_join_field": { | |
"name": "preferred_company", | |
"parent" : "2" | |
} | |
} | |
GET join/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"text": "amazon" | |
} | |
} | |
], | |
"filter": { | |
"term": { | |
"my_join_field.name": "company" | |
} | |
}, | |
"should": [ | |
{ | |
"has_child": { | |
"type": "preferred_company", | |
"query": { | |
"match": { | |
"user": "alex" | |
} | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
GET join/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"text": "amazon" | |
} | |
} | |
], | |
"should": [ | |
{ | |
"has_child": { | |
"type": "preferred_company", | |
"query": { | |
"match": { | |
"user": "paul" | |
} | |
}, | |
"inner_hits": {} | |
} | |
} | |
] | |
} | |
} | |
} | |
# bin/elasticsearch-plugin install analysis-phonetic | |
DELETE phonetic_sample | |
PUT phonetic_sample | |
{ | |
"mappings": { | |
"person" : { | |
"properties": { | |
"name" : { | |
"type": "text", | |
"analyzer": "my_analyzer", | |
"fields": { | |
"koelner" : { | |
"type": "text", | |
"analyzer": "koelner_analyzer" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"settings": { | |
"index": { | |
"analysis": { | |
"analyzer": { | |
"my_analyzer": { | |
"tokenizer": "standard", | |
"filter": [ | |
"standard", | |
"lowercase", | |
"my_metaphone" | |
] | |
}, | |
"koelner_analyzer": { | |
"tokenizer": "standard", | |
"filter": [ | |
"standard", | |
"lowercase", | |
"koelner_metaphone" | |
] | |
} | |
}, | |
"filter": { | |
"my_metaphone": { | |
"type": "phonetic", | |
"encoder": "metaphone", | |
"replace": false | |
}, | |
"koelner_metaphone": { | |
"type": "phonetic", | |
"encoder": "koelnerphonetik", | |
"replace": false | |
} | |
} | |
} | |
} | |
} | |
} | |
POST phonetic_sample/_analyze | |
{ | |
"analyzer": "my_analyzer", | |
"text": "Joe Blocks" | |
} | |
POST phonetic_sample/_analyze | |
{ | |
"analyzer": "koelner_analyzer", | |
"text": "Aleksander" | |
} | |
PUT phonetic_sample/person/1 | |
{ | |
"name" : "Peter Meyer" | |
} | |
PUT phonetic_sample/person/2 | |
{ | |
"name" : "Peter Meier" | |
} | |
PUT phonetic_sample/person/3 | |
{ | |
"name" : "Peter Maier" | |
} | |
PUT phonetic_sample/person/4 | |
{ | |
"name" : "Peter Mayer" | |
} | |
GET phonetic_sample/_search | |
{ | |
"query": { | |
"match": { | |
"name": "Maier" | |
} | |
} | |
} | |
GET phonetic_sample/_search | |
{ | |
"query": { | |
"match": { | |
"name.koelner": "Maier" | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment