Last active
July 4, 2024 09:28
-
-
Save dadoonet/5179ee72ecbf08f12f53d4bda1b76bab to your computer and use it in GitHub Desktop.
Demo script for "A NoSQL search engine to find..." talk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### REINIT | |
DELETE user | |
PUT user | |
{ | |
"mappings": { | |
"properties": { | |
"name": { | |
"type": "text" | |
}, | |
"comments": { | |
"type": "text" | |
} | |
} | |
} | |
} | |
POST user/_doc | |
{ | |
"name": "David Pilato", | |
"comments": "Developer at elastic" | |
} | |
POST user/_doc | |
{ | |
"name": "Malloum Laya", | |
"comments": "Developer, Worked with David at french customs service" | |
} | |
POST user/_doc | |
{ | |
"name": "David Gageot", | |
"comments": "Engineer at Google" | |
} | |
POST user/_doc | |
{ | |
"name": "David David", | |
"comments": "Who is that guy?" | |
} | |
### SEARCH | |
# Why the 1st doc? | |
GET user/_search | |
{ | |
"query": { | |
"match": { | |
"name": "David" | |
} | |
} | |
} | |
# Note that it works whatever the case | |
GET user/_search | |
{ | |
"query": { | |
"match": { | |
"name": "DaVId" | |
} | |
} | |
} | |
# Search within 2 fields: why the 1st doc? | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "David", | |
"fields": [ "name", "comments" ] | |
} | |
} | |
} | |
# Name is more important for us (positive boost) | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "David", | |
"fields": [ "name^3", "comments" ] | |
} | |
} | |
} | |
# Name is more important for us (negative boost) | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "David", | |
"fields": [ "name", "comments^0.3" ] | |
} | |
} | |
} | |
# Typos | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "Dadid", | |
"fields": [ "name", "comments^0.3" ] | |
} | |
} | |
} | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "Dadid", | |
"fields": [ "name", "comments^0.3" ], | |
"fuzziness": 1 | |
} | |
} | |
} | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "Dadod", | |
"fields": [ "name", "comments^0.3" ], | |
"fuzziness": 1 | |
} | |
} | |
} | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "Dadod", | |
"fields": [ "name", "comments^0.3" ], | |
"fuzziness": 2 | |
} | |
} | |
} | |
# auto adjusts automatically the fuzziness factor depending on the length of the term | |
# Here 2 typos in a short text does not match | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "Dadod", | |
"fields": [ "name", "comments^0.3" ], | |
"fuzziness": "auto" | |
} | |
} | |
} | |
# Here 2 typos in a longer text matches | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "Devellopeur", | |
"fields": [ "name", "comments^0.3" ], | |
"fuzziness": "auto" | |
} | |
} | |
} | |
# Strange result? | |
GET user/_search | |
{ | |
"query": { | |
"match": { | |
"comments": "at elastic" | |
} | |
} | |
} | |
# Using a phrase | |
GET user/_search | |
{ | |
"query": { | |
"match_phrase": { | |
"comments": "at elastic" | |
} | |
} | |
} | |
## Playing with Synonyms | |
DELETE user | |
PUT user | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"for_synonyms": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"synonym_filter" | |
] | |
} | |
}, | |
"filter": { | |
"synonym_filter": { | |
"type": "synonym", | |
"synonyms": [ | |
"engineer, developer => engineer" | |
] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"properties": { | |
"name": { | |
"type": "text" | |
}, | |
"comments": { | |
"type": "text", | |
"analyzer": "for_synonyms" | |
} | |
} | |
} | |
} | |
GET user/_analyze | |
{ | |
"analyzer": "for_synonyms", | |
"text": "David is engineer" | |
} | |
GET user/_analyze | |
{ | |
"analyzer": "for_synonyms", | |
"text": "David is Developer" | |
} | |
GET user/_analyze | |
{ | |
"explain": true, | |
"analyzer": "for_synonyms", | |
"text": "David is Developer" | |
} | |
POST user/_doc | |
{ | |
"name": "David Pilato", | |
"comments": "Developer at elastic" | |
} | |
POST user/_doc | |
{ | |
"name": "Malloum Laya", | |
"comments": "Developer, Worked with David at french customs service" | |
} | |
POST user/_doc | |
{ | |
"name": "David Gageot", | |
"comments": "Engineer at Google" | |
} | |
POST user/_doc | |
{ | |
"name": "David David", | |
"comments": "Who is that guy?" | |
} | |
GET user/_search | |
{ | |
"query": { | |
"match": { | |
"comments": "developer" | |
} | |
} | |
} | |
GET user/_search | |
{ | |
"query": { | |
"match": { | |
"comments": "engineer" | |
} | |
} | |
} | |
### Control the relevancy (score) | |
DELETE user | |
PUT user | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"for_synonyms": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"synonym_filter" | |
] | |
} | |
}, | |
"filter": { | |
"synonym_filter": { | |
"type": "synonym", | |
"synonyms": [ | |
"engineer, developer => engineer" | |
] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"properties": { | |
"name": { | |
"type": "text" | |
}, | |
"comments": { | |
"type": "text", | |
"fields": { | |
"synonyms": { | |
"type": "text", | |
"analyzer": "for_synonyms" | |
} | |
} | |
} | |
} | |
} | |
} | |
POST user/_doc | |
{ | |
"name": "David Pilato", | |
"comments": "Developer at elastic" | |
} | |
POST user/_doc | |
{ | |
"name": "Malloum Laya", | |
"comments": "Developer, Worked with David at french customs service" | |
} | |
POST user/_doc | |
{ | |
"name": "David Gageot", | |
"comments": "Engineer at Google" | |
} | |
POST user/_doc | |
{ | |
"name": "David David", | |
"comments": "Who is that guy?" | |
} | |
# search without synonyms | |
GET user/_search | |
{ | |
"query": { | |
"match": { | |
"comments": "engineer" | |
} | |
} | |
} | |
# search using synonyms | |
GET user/_search | |
{ | |
"query": { | |
"match": { | |
"comments.synonyms": "engineer" | |
} | |
} | |
} | |
# remember our fuzzy search | |
GET user/_search | |
{ | |
"query": { | |
"multi_match" : { | |
"query": "gougle", | |
"fields": [ "name", "comments^0.3" ], | |
"fuzziness": 2 | |
} | |
} | |
} | |
# remember our phrase search | |
GET user/_search | |
{ | |
"query": { | |
"match_phrase": { | |
"comments": "at elastic" | |
} | |
} | |
} | |
## Let's combine all that | |
GET user/_search | |
{ | |
"query": { | |
"bool": { | |
"should": [ | |
{ | |
"match_phrase": { | |
"name": { | |
"query" : "david pilato", | |
"_name": "phrase on name", | |
"boost": 8.0 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "david pilato", | |
"operator": "and", | |
"_name": "all terms on name", | |
"boost": 2.0 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "david pilato", | |
"operator": "or", | |
"_name": "at least one term on name", | |
"boost": 1.8 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "david pilato", | |
"fuzziness": 2, | |
"_name": "fuzzy on name", | |
"boost": 1.0 | |
} | |
} | |
}, | |
{ | |
"match_phrase": { | |
"comments": { | |
"query" : "david pilato", | |
"_name": "phrase on comments", | |
"boost": 0.9 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "david pilato", | |
"operator": "and", | |
"_name": "all terms on comments", | |
"boost": 0.8 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "david pilato", | |
"operator": "or", | |
"_name": "at least one term on comments", | |
"boost": 0.7 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments.synonyms": { | |
"query": "david pilato", | |
"_name": "synonyms on comments", | |
"boost": 0.6 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "david pilato", | |
"fuzziness": 2, | |
"_name": "fuzzy on comments", | |
"boost": 0.2 | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
# Changing the user input can be hard... | |
GET user/_search | |
{ | |
"query": { | |
"bool": { | |
"should": [ | |
{ | |
"match_phrase": { | |
"name": { | |
"query" : "david polito", | |
"_name": "phrase on name", | |
"boost": 8.0 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "david polito", | |
"operator": "and", | |
"_name": "all terms on name", | |
"boost": 2.0 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "david polito", | |
"operator": "or", | |
"_name": "at least one term on name", | |
"boost": 1.8 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "david polito", | |
"fuzziness": 2, | |
"_name": "fuzzy on name", | |
"boost": 1.0 | |
} | |
} | |
}, | |
{ | |
"match_phrase": { | |
"comments": { | |
"query" : "david polito", | |
"_name": "phrase on comments", | |
"boost": 0.9 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "david polito", | |
"operator": "and", | |
"_name": "all terms on comments", | |
"boost": 0.8 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "david polito", | |
"operator": "or", | |
"_name": "at least one term on comments", | |
"boost": 0.7 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments.synonyms": { | |
"query": "david polito", | |
"_name": "synonyms on comments", | |
"boost": 0.6 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "david polito", | |
"fuzziness": 2, | |
"_name": "fuzzy on comments", | |
"boost": 0.2 | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
### Register a search template | |
POST _scripts/user | |
{ | |
"script": { | |
"lang": "mustache", | |
"source": { | |
"query": { | |
"bool": { | |
"should": [ | |
{ | |
"match_phrase": { | |
"name": { | |
"query": "{{query_string}}", | |
"_name": "phrase on name - {{query_string}}", | |
"boost": 8 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "{{query_string}}", | |
"operator": "and", | |
"_name": "all terms on name - {{query_string}}", | |
"boost": 2 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "{{query_string}}", | |
"operator": "or", | |
"_name": "at least one term on name - {{query_string}}", | |
"boost": 1.8 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"name": { | |
"query": "{{query_string}}", | |
"fuzziness": 2, | |
"_name": "fuzzy on name - {{query_string}}", | |
"boost": 1 | |
} | |
} | |
}, | |
{ | |
"match_phrase": { | |
"comments": { | |
"query": "{{query_string}}", | |
"_name": "phrase on comments - {{query_string}}", | |
"boost": 0.9 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "{{query_string}}", | |
"operator": "and", | |
"_name": "all terms on comments - {{query_string}}", | |
"boost": 0.8 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "{{query_string}}", | |
"operator": "or", | |
"_name": "at least one term on comments - {{query_string}}", | |
"boost": 0.7 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments.synonyms": { | |
"query": "{{query_string}}", | |
"_name": "synonyms on comments - {{query_string}}", | |
"boost": 0.6 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"comments": { | |
"query": "{{query_string}}", | |
"fuzziness": 2, | |
"_name": "fuzzy on comments - {{query_string}}", | |
"boost": 0.2 | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "david" | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "david pilato" | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "dovad polito" | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "david polito" | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "david engineer at google" | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "david developer at google" | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "at elastic" | |
} | |
} | |
GET /_search/template | |
{ | |
"id": "user", | |
"params": { | |
"query_string": "elastic customs david" | |
} | |
} | |
# Open https://gist.github.com/dadoonet/5179ee72ecbf08f12f53d4bda1b76bab |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
great job brother