Skip to content

Instantly share code, notes, and snippets.

@dadoonet
Last active July 4, 2024 09:28
Show Gist options
  • Save dadoonet/5179ee72ecbf08f12f53d4bda1b76bab to your computer and use it in GitHub Desktop.
Save dadoonet/5179ee72ecbf08f12f53d4bda1b76bab to your computer and use it in GitHub Desktop.
Demo script for "A NoSQL search engine to find..." talk
### REINIT
DELETE user
PUT user
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"comments": {
"type": "text"
}
}
}
}
POST user/_doc
{
"name": "David Pilato",
"comments": "Developer at elastic"
}
POST user/_doc
{
"name": "Malloum Laya",
"comments": "Developer, Worked with David at french customs service"
}
POST user/_doc
{
"name": "David Gageot",
"comments": "Engineer at Google"
}
POST user/_doc
{
"name": "David David",
"comments": "Who is that guy?"
}
### SEARCH
# Why the 1st doc?
GET user/_search
{
"query": {
"match": {
"name": "David"
}
}
}
# Note that it works whatever the case
GET user/_search
{
"query": {
"match": {
"name": "DaVId"
}
}
}
# Search within 2 fields: why the 1st doc?
GET user/_search
{
"query": {
"multi_match" : {
"query": "David",
"fields": [ "name", "comments" ]
}
}
}
# Name is more important for us (positive boost)
GET user/_search
{
"query": {
"multi_match" : {
"query": "David",
"fields": [ "name^3", "comments" ]
}
}
}
# Name is more important for us (negative boost)
GET user/_search
{
"query": {
"multi_match" : {
"query": "David",
"fields": [ "name", "comments^0.3" ]
}
}
}
# Typos
GET user/_search
{
"query": {
"multi_match" : {
"query": "Dadid",
"fields": [ "name", "comments^0.3" ]
}
}
}
GET user/_search
{
"query": {
"multi_match" : {
"query": "Dadid",
"fields": [ "name", "comments^0.3" ],
"fuzziness": 1
}
}
}
GET user/_search
{
"query": {
"multi_match" : {
"query": "Dadod",
"fields": [ "name", "comments^0.3" ],
"fuzziness": 1
}
}
}
GET user/_search
{
"query": {
"multi_match" : {
"query": "Dadod",
"fields": [ "name", "comments^0.3" ],
"fuzziness": 2
}
}
}
# auto adjusts automatically the fuzziness factor depending on the length of the term
# Here 2 typos in a short text does not match
GET user/_search
{
"query": {
"multi_match" : {
"query": "Dadod",
"fields": [ "name", "comments^0.3" ],
"fuzziness": "auto"
}
}
}
# Here 2 typos in a longer text matches
GET user/_search
{
"query": {
"multi_match" : {
"query": "Devellopeur",
"fields": [ "name", "comments^0.3" ],
"fuzziness": "auto"
}
}
}
# Strange result?
GET user/_search
{
"query": {
"match": {
"comments": "at elastic"
}
}
}
# Using a phrase
GET user/_search
{
"query": {
"match_phrase": {
"comments": "at elastic"
}
}
}
## Playing with Synonyms
DELETE user
PUT user
{
"settings": {
"analysis": {
"analyzer": {
"for_synonyms": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"synonym_filter"
]
}
},
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
"engineer, developer => engineer"
]
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text"
},
"comments": {
"type": "text",
"analyzer": "for_synonyms"
}
}
}
}
GET user/_analyze
{
"analyzer": "for_synonyms",
"text": "David is engineer"
}
GET user/_analyze
{
"analyzer": "for_synonyms",
"text": "David is Developer"
}
GET user/_analyze
{
"explain": true,
"analyzer": "for_synonyms",
"text": "David is Developer"
}
POST user/_doc
{
"name": "David Pilato",
"comments": "Developer at elastic"
}
POST user/_doc
{
"name": "Malloum Laya",
"comments": "Developer, Worked with David at french customs service"
}
POST user/_doc
{
"name": "David Gageot",
"comments": "Engineer at Google"
}
POST user/_doc
{
"name": "David David",
"comments": "Who is that guy?"
}
GET user/_search
{
"query": {
"match": {
"comments": "developer"
}
}
}
GET user/_search
{
"query": {
"match": {
"comments": "engineer"
}
}
}
### Control the relevancy (score)
DELETE user
PUT user
{
"settings": {
"analysis": {
"analyzer": {
"for_synonyms": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"synonym_filter"
]
}
},
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
"engineer, developer => engineer"
]
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text"
},
"comments": {
"type": "text",
"fields": {
"synonyms": {
"type": "text",
"analyzer": "for_synonyms"
}
}
}
}
}
}
POST user/_doc
{
"name": "David Pilato",
"comments": "Developer at elastic"
}
POST user/_doc
{
"name": "Malloum Laya",
"comments": "Developer, Worked with David at french customs service"
}
POST user/_doc
{
"name": "David Gageot",
"comments": "Engineer at Google"
}
POST user/_doc
{
"name": "David David",
"comments": "Who is that guy?"
}
# search without synonyms
GET user/_search
{
"query": {
"match": {
"comments": "engineer"
}
}
}
# search using synonyms
GET user/_search
{
"query": {
"match": {
"comments.synonyms": "engineer"
}
}
}
# remember our fuzzy search
GET user/_search
{
"query": {
"multi_match" : {
"query": "gougle",
"fields": [ "name", "comments^0.3" ],
"fuzziness": 2
}
}
}
# remember our phrase search
GET user/_search
{
"query": {
"match_phrase": {
"comments": "at elastic"
}
}
}
## Let's combine all that
GET user/_search
{
"query": {
"bool": {
"should": [
{
"match_phrase": {
"name": {
"query" : "david pilato",
"_name": "phrase on name",
"boost": 8.0
}
}
},
{
"match": {
"name": {
"query": "david pilato",
"operator": "and",
"_name": "all terms on name",
"boost": 2.0
}
}
},
{
"match": {
"name": {
"query": "david pilato",
"operator": "or",
"_name": "at least one term on name",
"boost": 1.8
}
}
},
{
"match": {
"name": {
"query": "david pilato",
"fuzziness": 2,
"_name": "fuzzy on name",
"boost": 1.0
}
}
},
{
"match_phrase": {
"comments": {
"query" : "david pilato",
"_name": "phrase on comments",
"boost": 0.9
}
}
},
{
"match": {
"comments": {
"query": "david pilato",
"operator": "and",
"_name": "all terms on comments",
"boost": 0.8
}
}
},
{
"match": {
"comments": {
"query": "david pilato",
"operator": "or",
"_name": "at least one term on comments",
"boost": 0.7
}
}
},
{
"match": {
"comments.synonyms": {
"query": "david pilato",
"_name": "synonyms on comments",
"boost": 0.6
}
}
},
{
"match": {
"comments": {
"query": "david pilato",
"fuzziness": 2,
"_name": "fuzzy on comments",
"boost": 0.2
}
}
}
]
}
}
}
# Changing the user input can be hard...
GET user/_search
{
"query": {
"bool": {
"should": [
{
"match_phrase": {
"name": {
"query" : "david polito",
"_name": "phrase on name",
"boost": 8.0
}
}
},
{
"match": {
"name": {
"query": "david polito",
"operator": "and",
"_name": "all terms on name",
"boost": 2.0
}
}
},
{
"match": {
"name": {
"query": "david polito",
"operator": "or",
"_name": "at least one term on name",
"boost": 1.8
}
}
},
{
"match": {
"name": {
"query": "david polito",
"fuzziness": 2,
"_name": "fuzzy on name",
"boost": 1.0
}
}
},
{
"match_phrase": {
"comments": {
"query" : "david polito",
"_name": "phrase on comments",
"boost": 0.9
}
}
},
{
"match": {
"comments": {
"query": "david polito",
"operator": "and",
"_name": "all terms on comments",
"boost": 0.8
}
}
},
{
"match": {
"comments": {
"query": "david polito",
"operator": "or",
"_name": "at least one term on comments",
"boost": 0.7
}
}
},
{
"match": {
"comments.synonyms": {
"query": "david polito",
"_name": "synonyms on comments",
"boost": 0.6
}
}
},
{
"match": {
"comments": {
"query": "david polito",
"fuzziness": 2,
"_name": "fuzzy on comments",
"boost": 0.2
}
}
}
]
}
}
}
### Register a search template
POST _scripts/user
{
"script": {
"lang": "mustache",
"source": {
"query": {
"bool": {
"should": [
{
"match_phrase": {
"name": {
"query": "{{query_string}}",
"_name": "phrase on name - {{query_string}}",
"boost": 8
}
}
},
{
"match": {
"name": {
"query": "{{query_string}}",
"operator": "and",
"_name": "all terms on name - {{query_string}}",
"boost": 2
}
}
},
{
"match": {
"name": {
"query": "{{query_string}}",
"operator": "or",
"_name": "at least one term on name - {{query_string}}",
"boost": 1.8
}
}
},
{
"match": {
"name": {
"query": "{{query_string}}",
"fuzziness": 2,
"_name": "fuzzy on name - {{query_string}}",
"boost": 1
}
}
},
{
"match_phrase": {
"comments": {
"query": "{{query_string}}",
"_name": "phrase on comments - {{query_string}}",
"boost": 0.9
}
}
},
{
"match": {
"comments": {
"query": "{{query_string}}",
"operator": "and",
"_name": "all terms on comments - {{query_string}}",
"boost": 0.8
}
}
},
{
"match": {
"comments": {
"query": "{{query_string}}",
"operator": "or",
"_name": "at least one term on comments - {{query_string}}",
"boost": 0.7
}
}
},
{
"match": {
"comments.synonyms": {
"query": "{{query_string}}",
"_name": "synonyms on comments - {{query_string}}",
"boost": 0.6
}
}
},
{
"match": {
"comments": {
"query": "{{query_string}}",
"fuzziness": 2,
"_name": "fuzzy on comments - {{query_string}}",
"boost": 0.2
}
}
}
]
}
}
}
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "david"
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "david pilato"
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "dovad polito"
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "david polito"
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "david engineer at google"
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "david developer at google"
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "at elastic"
}
}
GET /_search/template
{
"id": "user",
"params": {
"query_string": "elastic customs david"
}
}
# Open https://gist.github.com/dadoonet/5179ee72ecbf08f12f53d4bda1b76bab
@RutvikBodara
Copy link

great job brother

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment