Last active
April 2, 2023 10:12
-
-
Save dacr/2265ba5e80cdaa12225e616dbd81948e to your computer and use it in GitHub Desktop.
example based elasticsearch training lab content for use within kibana developer console / published by https://github.com/dacr/code-examples-manager #bf08e514-f207-46df-9db9-c77dc60a350c/5771cf9a8152eceb95afbf04c029cbfab4d790c2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## summary : example based elasticsearch training lab content for use within kibana developer console | |
## keywords : cem, code-examples-manager, elasticsearch, configuration, lab, index-mapping, query | |
## publish : gist | |
## authors : David Crosson | |
## license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
## id : bf08e514-f207-46df-9db9-c77dc60a350c | |
## created-on : 2021-04-29T16:52:54Z | |
## managed-by : https://github.com/dacr/code-examples-manager | |
# =============================================================== | |
# DO NOT EXECUTE IF USING THE COMMON ELASTICSEARCH | |
DELETE _template/cem_mapping_template | |
DELETE cem-* | |
DELETE tmp-cem-* | |
DELETE tmp-exec-* | |
# INJECT DATA | |
# EXECUTE inserted-documents-default/feed.sh | |
# => CREATE KIBANA AN INDEX PATTERN FOR : cem-* | |
# =============================================================== | |
# USE the basic index naming "cem-default" | |
# --------------------- EX1-1 | |
GET _cat/indices | |
# --------------------- EX1-2 | |
GET cem-default/_count | |
# --------------------- EX1-3 | |
GET cem-default/_search | |
# --------------------- EX1-4 | |
GET cem-default/_search?q='fractal mandelbrot' | |
#Lucene query string syntax : a AND b, a OR b, ... | |
# --------------------- EX1-5 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "fractal OR mandelbrot" | |
} | |
} | |
} | |
# same results & score as previous | |
# uses query string “mini-language" : a AND b, a OR b, field:value, (), _exists_:field, "that", th*, th?t, ... | |
# --------------------- EX1-6 | |
GET cem-default/_search | |
{ | |
"query": { | |
"simple_query_string": { | |
"query": "fractal|mandelbrot" | |
} | |
} | |
} | |
# same results & score as previous | |
# uses simple query string syntax : a+b, a|b, -b, "that", th*, () | |
# --------------------- EX1-7 | |
GET cem-default/_search | |
{ | |
"query": { | |
"match_all": { | |
} | |
} | |
} | |
# returns everything with full content | |
# --------------------- EX1-8 | |
GET cem-default/_search | |
{ | |
"_source": ["filename", "summary"], | |
"query": { | |
"match_all": { | |
} | |
} | |
} | |
# returns everything with content limited to given fields | |
# --------------------- EX1-9 | |
GET cem-default/_search | |
{ | |
"_source": ["summary"], | |
"query": { | |
"match": { | |
"summary": "caching operations" | |
} | |
} | |
} | |
# by default a OR is done within the given text after it has been analyzed | |
# no dedicated syntax, no order | |
# but many parameters to change the match behavior : operator, analyzer, ... | |
# --------------------- EX1-10 | |
GET cem-default/_search | |
{ | |
"_source": ["summary"], | |
"query": { | |
"match_phrase": { | |
"summary": "get current user" | |
} | |
} | |
} | |
# match in sequence => must be the exact phrase | |
# we'll how to improve that after | |
# --------------------- EX1-11 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "logging-tips" | |
} | |
} | |
} | |
# --------------------- EX1-12 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"fields": ["summary"], | |
"query": "logging-tips" | |
} | |
} | |
} | |
# --------------------- EX1-13 | |
GET cem-default/_search | |
{ | |
"_source": ["filename", "summary"], | |
"query": { | |
"query_string": { | |
"query": "logg*" | |
} | |
} | |
} | |
# --------------------- EX1-14 | |
GET cem-default/_search | |
{ | |
"_source": ["filename", "summary"], | |
"query": { | |
"simple_query_string": { | |
"query": "[email protected]" | |
} | |
} | |
} | |
# --------------------- EX1-15 | |
GET cem-default/_search | |
{ | |
"_source": ["filename", "summary"], | |
"query": { | |
"simple_query_string": { | |
"query": "\"[email protected]\"" | |
} | |
} | |
} | |
# --------------------- EX1-16 | |
GET cem-default/_search | |
{ | |
"query": { | |
"simple_query_string": { | |
"query": "unsafeRun" | |
} | |
} | |
} | |
# returns NO results | |
# --------------------- EX1-17 | |
GET cem-default/_search | |
{ | |
"query": { | |
"simple_query_string": { | |
"query": "Runtime.default.unsafeRun" | |
} | |
} | |
} | |
# returns many results with a high score | |
# because for the standard analyzer "Runtime.default.unsafeRun" is a unique word | |
# --------------------- EX1-18 | |
GET cem-default/_search | |
{ | |
"query": { | |
"simple_query_string": { | |
"query": "2.6.13" | |
} | |
} | |
} | |
# =============================================================== | |
# Evaluating analyzers | |
# --------------------- EX2-1 | |
POST _analyze | |
{ | |
"analyzer": "standard", | |
"text": "I sold some stocks yesterday, people are sicks" | |
} | |
# --------------------- EX2-2 | |
POST _analyze | |
{ | |
"analyzer": "english", | |
"text": "I sold some stocks yesterday, people are sicks" | |
} | |
# --------------------- EX2-3 | |
POST _analyze | |
{ | |
"analyzer": "english", | |
"text": "I'm getting sick" | |
} | |
# --------------------- EX2-4 | |
POST _analyze | |
{ | |
"analyzer": "standard", | |
"text": "j'ai attrapé la grippe hier à cause de personnes contagieuses" | |
} | |
# --------------------- EX2-5 | |
POST _analyze | |
{ | |
"analyzer": "french", | |
"text": "j'ai attrapé la grippe hier à cause de personnes contagieuses" | |
} | |
# --------------------- EX2-6 | |
POST _analyze | |
{ | |
"analyzer": "standard", | |
"text": "truc.much there and then." | |
} | |
# --------------------- EX2-7 | |
POST _analyze | |
{ | |
"analyzer": "simple", | |
"text": "truc.much there and then." | |
} | |
# --------------------- EX2-8 | |
POST _analyze | |
{ | |
"analyzer": "standard", | |
"text": "[email protected]" | |
} | |
# --------------------- EX2-9 | |
POST _analyze | |
{ | |
"analyzer": "simple", | |
"text": "[email protected]" | |
} | |
# --------------------- EX2-10 | |
POST _analyze | |
{ | |
"analyzer": "whitespace", | |
"text": "[email protected]" | |
} | |
# =============================================================== | |
# Enhancing search and start more complex queries | |
# DO NOT EXECUTE IF USING THE COMMON ELASTICSEARCH | |
DELETE _template/cem_mapping_template | |
DELETE cem-* | |
# --------------------- EX3-1 | |
# Using the simple analyzer instead of the standard one | |
PUT cem-default | |
{ | |
"settings":{ | |
"index":{"number_of_replicas":0}, | |
"analysis":{ | |
"analyzer":{"default":{"type" : "simple"}} | |
} | |
} | |
} | |
# INJECT DATA | |
# EXECUTE inserted-documents-default/feed.sh | |
# --------------------- EX3-2 | |
GET _cat/indices | |
# now our index is green ! | |
# --------------------- EX3-3 | |
GET cem-default/_search | |
{ | |
"_source":["category","filename","summary"], | |
"query": { | |
"query_string": { | |
"query": "unsafeRun" | |
} | |
} | |
} | |
# NOW RETURNS MANY RESULTS | |
# --------------------- EX3-4 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "2.6.13" | |
} | |
} | |
} | |
# NOW RETURNS NO RESULTS | |
# --------------------- EX3-5 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "backoff" | |
} | |
} | |
} | |
# RETURNS no results because camelcase not take into account of course | |
# --------------------- EX3-6 | |
GET cem-default/_search | |
{ | |
"query": { | |
"simple_query_string": { | |
"query": "ExponentialBackoffRetry" | |
} | |
} | |
} | |
# --------------------- EX3-7 | |
GET cem-default/_search | |
{ | |
"_source":["filename","summary"], | |
"query": { | |
"query_string": { | |
"query": "scala" | |
} | |
} | |
} | |
# --------------------- EX3-8 | |
GET cem-default/_search | |
{ | |
"_source":["filename","summary"], | |
"query": { | |
"query_string": { | |
"query": "scala zio" | |
} | |
} | |
} | |
# implicits OR | |
# --------------------- EX3-9 | |
GET cem-default/_search | |
{ | |
"_source":["filename","summary"], | |
"query": { | |
"query_string": { | |
"query": "scala OR zio" | |
} | |
} | |
} | |
# --------------------- EX3-10 | |
GET cem-default/_search | |
{ | |
"_source":["filename","summary"], | |
"query": { | |
"query_string": { | |
"query": "scala AND zio" | |
} | |
} | |
} | |
# --------------------- EX3-11 | |
GET cem-default/_search | |
{ | |
"_source":["filename","summary"], | |
"query": { | |
"bool": { | |
"should": [ | |
{"match": {"keywords":"scala zio"}} | |
] | |
} | |
} | |
} | |
# --------------------- EX3-12 | |
GET cem-default/_search | |
{ | |
"_source":["filename","summary"], | |
"query": { | |
"bool": { | |
"must": [ | |
{"match": {"keywords":"scala"}}, | |
{"match": {"keywords":"zio"}} | |
] | |
} | |
} | |
} | |
# --------------------- EX3-13 | |
# The good way to give more importance to specific fields | |
GET cem-default/_search | |
{ | |
"query": { | |
"multi_match": { | |
"query": "get user", | |
"operator": "and", | |
"type": "most_fields", | |
"fields": [ | |
"keywords^1", | |
"summary^1", | |
"content^5" | |
] | |
} | |
} | |
} | |
# =============================================================== | |
# Fixing again the search capabilities | |
# Check the default generated mapping | |
GET cem-default/_mapping | |
# DO NOT EXECUTE IF USING THE COMMON ELASTICSEARCH | |
DELETE _template/cem_mapping_template | |
DELETE cem-* | |
# --------------------- EX4-1 | |
# Using the simple analyzer instead of the standard one | |
# and add support for camecase | |
PUT cem-default | |
{ | |
"settings": { | |
"index": { | |
"number_of_replicas": 0, | |
"number_of_shards": 2 | |
}, | |
"analysis": { | |
"analyzer": { | |
"default": { | |
"type": "simple" | |
}, | |
"camelcase": { | |
"type" : "pattern", | |
"pattern" : | |
"([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])" | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"properties": { | |
"summary": { | |
"type": "text", | |
"fields": { | |
"summary-english": { | |
"type": "text", | |
"analyzer": "english" | |
} | |
} | |
}, | |
"content": { | |
"type": "text", | |
"fields": { | |
"content-standard": { | |
"type": "text", | |
"analyzer": "standard" | |
}, | |
"content-english": { | |
"type": "text", | |
"analyzer": "english" | |
}, | |
"content-camelcase": { | |
"type": "text", | |
"analyzer": "camelcase", | |
"search_analyzer": "standard" | |
} | |
} | |
} | |
} | |
} | |
} | |
# INJECT DATA | |
# EXECUTE inserted-documents-default/feed.sh | |
# --------------------- EX4-2 | |
POST cem-default/_analyze | |
{ | |
"field": "content", | |
"text": "coolRaoul42, trucBidule, joe_doe" | |
} | |
# --------------------- EX4-3 | |
POST cem-default/_analyze | |
{ | |
"field": "content.content-camelcase", | |
"text": "coolRaoul42, trucBidule, joe_doe" | |
} | |
# --------------------- EX4-4 | |
POST cem-default/_analyze | |
{ | |
"field": "content.content-camelcase", | |
"text" : "import org.apache.curator.retry.{ExponentialBackoffRetry, RetryNTimes}" | |
} | |
# --------------------- EX4-5 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "unsafeRun" | |
} | |
} | |
} | |
# NOW RETURNS THE RIGHT NUMBER OF RESULTS | |
# ALSO BECAUSE search_analyzer has been set to "standard" on content-camelcase | |
# --------------------- EX4-6 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "unsafe" | |
} | |
} | |
} | |
# And of course we can now use just a part of camelcase word | |
# --------------------- EX4-7 | |
GET cem-default/_search | |
{ | |
"query": { | |
"simple_query_string": { | |
"query": "2.6.13" | |
} | |
} | |
} | |
# NOW RETURNS THE RIGHT NUMBER OF RESULTS | |
# ALSO BECAUSE search_analyzer has been set to "standard" on content-camelcase | |
# --------------------- EX4-8 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "backoff" | |
} | |
} | |
} | |
# --------------------- EX4-9 | |
GET cem-default/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "exponential AND backoff" | |
} | |
} | |
} | |
# --------------------- EX4-10 | |
GET cem-default/_search | |
{ | |
"_source": ["content"], | |
"query": { | |
"query_string": { | |
"query": "\"async.AsyncCuratorFramework\"" | |
} | |
} | |
} | |
# TODO - to DEBUG | |
GET cem-default/_search | |
{ | |
"_source": ["content"], | |
"query": { | |
"match_phrase_prefix": { | |
"query": "async curator framework" | |
} | |
} | |
} | |
# --------------------- EX4-11 | |
# IT IS POSSIBLE TO HIGHLIGHT THE MATCH ! | |
GET cem-default/_search | |
{ | |
"_source":["summary"], | |
"query": { | |
"match": { | |
"summary": "snippets user" | |
} | |
}, | |
"highlight" : { | |
"fields" : { | |
"summary": {"force_source" : true} | |
} | |
} | |
} | |
# =============================================================== | |
# Optimizing mapping and playing with tokens | |
# DO NOT EXECUTE IF USING THE COMMON ELASTICSEARCH | |
DELETE _template/cem_mapping_template | |
DELETE cem-* | |
# --------------------- EX5-1 | |
# EXECUTE PRIOR TO DATA INJECTION | |
PUT _template/cem_mapping_template | |
{ | |
"index_patterns": ["cem-*"], | |
"settings": { | |
"index": { | |
"number_of_replicas": 0 | |
}, | |
"analysis": { | |
"filter":{ | |
"english_stop": { | |
"type": "stop", | |
"stopwords": "_english_" | |
}, | |
"english_keywords": { | |
"type": "keyword_marker", | |
"keywords": ["example"] | |
}, | |
"english_stemmer": { | |
"type": "stemmer", | |
"language": "english" | |
}, | |
"english_possessive_stemmer": { | |
"type": "stemmer", | |
"language": "possessive_english" | |
}, | |
"english_synonym" : { | |
"type" : "synonym", | |
"synonyms_path" : "synonyms.txt" | |
} | |
}, | |
"analyzer": { | |
"default": { | |
"type": "simple" | |
}, | |
"camelcase": { | |
"type" : "pattern", | |
"pattern" : | |
"([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])" | |
}, | |
"rebuilt_english": { | |
"tokenizer": "standard", | |
"filter": [ | |
"english_synonym", | |
"english_possessive_stemmer", | |
"lowercase", | |
"asciifolding", | |
"english_stop", | |
"english_keywords", | |
"english_stemmer" | |
] | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"properties": { | |
"category": { | |
"type": "text", | |
"fields": { | |
"keyword": { | |
"type": "keyword", | |
"ignore_above": 2000 | |
} | |
} | |
}, | |
"summary": { | |
"type": "text", | |
"analyzer": "rebuilt_english", | |
"fields": { | |
"summary-english": { | |
"type": "text", | |
"analyzer": "rebuilt_english", | |
"search_analyzer" : "rebuilt_english" | |
}, | |
"summary-standard": { | |
"type": "text", | |
"analyzer": "standard", | |
"search_analyzer" : "standard" | |
} | |
} | |
}, | |
"content": { | |
"type": "text", | |
"fields": { | |
"content-standard": { | |
"type": "text", | |
"analyzer": "standard" | |
}, | |
"content-english": { | |
"type": "text", | |
"analyzer": "rebuilt_english", | |
"search_analyzer" : "rebuilt_english" | |
}, | |
"content-camelcase": { | |
"type": "text", | |
"analyzer": "camelcase", | |
"search_analyzer": "standard" | |
} | |
} | |
}, | |
"file": { | |
"type": "text" | |
}, | |
"filename": { | |
"type": "text", | |
"fields": { | |
"keyword": { | |
"type": "keyword", | |
"ignore_above": 256 | |
} | |
} | |
}, | |
"id": { | |
"type": "text" | |
}, | |
"uuid": { | |
"type": "text" | |
}, | |
"keywords": { | |
"type": "text", | |
"fields": { | |
"keyword": { | |
"type": "keyword", | |
"ignore_above": 1000 | |
} | |
} | |
}, | |
"lastUpdated": { | |
"type": "date" | |
}, | |
"managedBy": { | |
"type": "text" | |
}, | |
"license": { | |
"type": "text", | |
"fields": { | |
"keyword": { | |
"type": "keyword", | |
"ignore_above": 256 | |
} | |
} | |
}, | |
"publish": { | |
"type": "text", | |
"fields": { | |
"keyword": { | |
"type": "keyword", | |
"ignore_above": 256 | |
} | |
} | |
}, | |
"authors": { | |
"type": "text", | |
"fields": { | |
"keyword": { | |
"type": "keyword", | |
"ignore_above": 256 | |
} | |
} | |
}, | |
"execution": { | |
"type": "text" | |
}, | |
"runWith": { | |
"type": "text" | |
} | |
} | |
} | |
} | |
# INJECT DATA (USE TIMED BASE INDEX NAME) | |
# EXECUTE inserted-documents-timed/feed.sh | |
# To check the applied mapping (automatically taken from the defined mapping template) | |
# --------------------- EX5-2 | |
GET _cat/indices | |
# --------------------- EX5-3 | |
GET cem-*/_count | |
# --------------------- EX5-4 | |
GET cem-*/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "zio AND (resource OR environment)" | |
} | |
} | |
} | |
# --------------------- EX5-5 | |
GET cem-*/_search | |
{ | |
"query": { | |
"simple_query_string": { | |
"query": "zio+(resource|environment)" | |
} | |
} | |
} | |
# --------------------- EX5-6 | |
GET cem-*/_search | |
{ | |
"_source": ["summary"], | |
"query": { | |
"match": { | |
"summary": { | |
"query":"get the current user", | |
"operator": "and" | |
} | |
} | |
} | |
} | |
# the is ignored now | |
# --------------------- EX5-7 | |
GET cem-*/_search | |
{ | |
"query": { | |
"match_phrase": { | |
"content": "cheat sheet" | |
} | |
} | |
} | |
# --------------------- EX5-8 | |
GET cem-*/_search?size=5 | |
{ | |
"_source": ["summary", "keywords"], | |
"query": { | |
"bool": { | |
"must": { | |
"match":{ "publish":"gist"} | |
}, | |
"filter": { | |
"term": { "keywords": "testable"} | |
}, | |
"must_not": [ | |
{"match": {"summary":{"query":"hello world"}}}, | |
{"match": {"keywords":"async"}} | |
] | |
} | |
} | |
} | |
# filter focus on sub data, allow caching and are ignored in the scorings | |
# --------------------- EX5-9 | |
# search for missing fields | |
POST /cem-*/_search | |
{ | |
"_source": ["filename","summary"], | |
"size": 20, | |
"query": { | |
"bool": { | |
"must_not": [ | |
{"exists": {"field": "execution"}} | |
] | |
} | |
} | |
} | |
# --------------------- EX5-10 | |
# Search within date ranges | |
POST cem-*/_search | |
{ | |
"query": { | |
"bool": { | |
"must":{"query_string": { "query": "vertx"} }, | |
"filter": {"range": { | |
"created_on": { | |
"gte": "2020-06-01", | |
"lte": "2020-12-31" | |
} | |
}} | |
} | |
} | |
} | |
# --------------------- EX5-11 | |
# Count created this last 6 months | |
POST /cem-*/_count | |
{ | |
"query": { | |
"bool": { | |
"must":{"query_string": { "query": "elasticsearch"} }, | |
"filter": {"range": { | |
"created_on": { | |
"gte": "now-1y" | |
} | |
}} | |
} | |
} | |
} | |
# TODO check with last_updated no results returned !!! (with zio) | |
# --------------------- EX5-12 | |
# Natural language query attempt | |
GET cem-*/_search | |
{ | |
"_source":["summary"], | |
"query": { | |
"query_string": { | |
"fields": ["summary"], | |
"query": "getting the users" | |
} | |
} | |
} | |
# Remember : | |
POST cem-2021-5/_analyze | |
{ | |
"field":"summary", | |
"text": "getting the users" | |
} | |
# --------------------- EX5-13 | |
GET cem-*/_search | |
{ | |
"_source":["summary"], | |
"query": { | |
"query_string": { | |
"fields": ["summary"], | |
"query": "getting the users", | |
"default_operator": "AND" | |
} | |
} | |
} | |
# users => user, getting => get, "the" is ignored | |
# --------------------- EX5-14 | |
# fetch & get has been made synonyms (config/synonyms.txt) | |
GET cem-*/_search | |
{ | |
"_source":["summary"], | |
"query": { | |
"query_string": { | |
"fields": ["summary", "content"], | |
"query": "fetch the users", | |
"default_operator": "AND" | |
} | |
} | |
} | |
GET cem-*/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "synonyms" | |
} | |
} | |
} | |
# all synonyms have been added in that case check (config/synonyms.txt) | |
POST cem-2021-5/_analyze | |
{ | |
"field":"summary", | |
"text": "fetch the users" | |
} | |
# --------------------- EX5-15 | |
GET cem-*/_search | |
{ | |
"_source":["summary"], | |
"query": { | |
"query_string": { | |
"fields": ["summary"], | |
"query": "sheetcheat", | |
"default_operator": "AND" | |
} | |
} | |
} | |
# all synonyms have been mapped into a single token check (config/synonyms.txt) | |
POST cem-2021-5/_analyze | |
{ | |
"field":"summary", | |
"text": "sheetcheat" | |
} | |
# --------------------- EX5-16A | |
GET cem-*/_search | |
{ | |
"_source":["summary"], | |
"query": { | |
"query_string": { | |
"fields": ["summary"], | |
"query": "arango database", | |
"default_operator": "AND" | |
} | |
} | |
} | |
# --------------------- EX5-16B ?? => FIXED | |
# something going wrong here : issues, issue, unassigned | |
# After check the expected document wasn't containing the "issue" word !! | |
# TAKE care with multifields when specifying search fields | |
GET cem-*/_search | |
{ | |
"_source":["summary"], | |
"query": { | |
"query_string": { | |
"fields":["content"], | |
"query": "elastic issues unassigned", | |
"default_operator": "AND" | |
} | |
} | |
} | |
# NO RESULTS !!! | |
GET cem-*/_search | |
{ | |
"_source":["summary", "content"], | |
"query": { | |
"query_string": { | |
"fields":["content.content-english"], | |
"query": "elastic issues unassigned", | |
"default_operator": "AND" | |
} | |
} | |
} | |
GET cem-*/_search | |
{ | |
"_source":["summary", "content"], | |
"query": { | |
"query_string": { | |
"fields":["content.content-english"], | |
"query": "elastic issue unassign", | |
"default_operator": "AND" | |
} | |
} | |
} | |
POST cem-2021-5/_analyze | |
{ | |
"field":"content.content-english", | |
"text": "elastic issues unassigned" | |
} | |
# --------------------- EX5-17 | |
# all synonyms have been mapped into two tokens, check (config/synonyms.txt) | |
POST cem-2021-1/_analyze | |
{ | |
"field": "summary.summary-english", | |
"text": "arangodb" | |
} | |
# PLACE THE SYNONYMS FILTER IN FIRST POSITION ! | |
# --------------------- EX5-18 | |
# gérer les fautes d'orthographe... ou les erreurs de saisies | |
GET cem-*/_search | |
{ | |
"_source": ["summary"], | |
"query": { | |
"query_string": { | |
"query":"interation" | |
} | |
} | |
} | |
# fuzzy search to deal with 1 mispelled word | |
GET cem-*/_search | |
{ | |
"_source": ["summary"], | |
"query": { | |
"fuzzy": { | |
"summary.summary-standard": { | |
"value":"interation" | |
} | |
} | |
} | |
} | |
# interation => interactions ! | |
# --------------------- EX5-19 | |
# fuzzy search to deal with mispelled words | |
GET cem-*/_search | |
{ | |
"_source": ["summary"], | |
"size": 3, | |
"query": { | |
"match": { | |
"summary": { | |
"query": "got the uuseer", | |
"fuzziness": "AUTO" | |
} | |
} | |
} | |
} | |
# =============================================================== | |
# Advanced queries | |
# --------------------- EX6-1 | |
# How many distinct examples keywords ? results stored in aggregations.count.value | |
POST /cem-*/_search?size=0 | |
{ | |
"aggs" : { | |
"the-count-for-me" : { | |
"cardinality" : { | |
"field" : "keywords.keyword" | |
} | |
} | |
} | |
} | |
# --------------------- EX6-2 | |
# Examples keywords occurences count / How many examples for each keyword ? | |
# Top10 of the most used keywords | |
POST /cem-*/_search | |
{ | |
"size":0, | |
"aggs" : { | |
"my-results" : { | |
"terms" : { | |
"field" : "keywords.keyword", | |
"size": 10 | |
} | |
} | |
} | |
} | |
# --------------------- EX6-3 | |
# How much examples in each category | |
POST /cem-*/_search | |
{ | |
"size":0, | |
"aggs" : { | |
"results" : { | |
"terms" : { | |
"field" : "category.keyword", | |
"size": 50 | |
} | |
} | |
} | |
} | |
# --------------------- EX6-4 | |
# Significant terms - categories with more than 5 document with async & http keywords | |
POST /cem-*/_search?size=0 | |
{ | |
"query": { | |
"terms": { | |
"keywords": [ | |
"async", "http" | |
] | |
} | |
}, | |
"aggregations": { | |
"significant_categories": { | |
"significant_terms": { | |
"field": "category.keyword", | |
"min_doc_count": 5 | |
} | |
} | |
} | |
} | |
# --------------------- EX6-5 | |
# COUNT AGGREGATE MONTHLY FOR A GIVEN TIME RANGE | |
GET /cem-*/_search | |
{ | |
"aggs": { | |
"my_results": { | |
"date_histogram": { | |
"field": "created_on", | |
"calendar_interval": "1M", | |
"time_zone": "Europe/Paris", | |
"min_doc_count": 1 | |
} | |
} | |
}, | |
"size": 0, | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"range": { | |
"created_on": { | |
"format": "strict_date_optional_time", | |
"gte": "2018-01-01T00:00:00.000Z", | |
"lte": "2021-12-31T23:59:59.999Z" | |
} | |
} | |
} | |
] | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment