Skip to content

Instantly share code, notes, and snippets.

@smirnoffs
Last active February 26, 2020 16:13
Show Gist options
  • Save smirnoffs/f113041fe50485410f78f90acc86616f to your computer and use it in GitHub Desktop.
Save smirnoffs/f113041fe50485410f78f90acc86616f to your computer and use it in GitHub Desktop.
Elasticsearch introduction
# Kibana version should match Elasticsearch version
GET /
DELETE services_v1
DELETE services_v2
DELETE resources
DELETE services_catalog_v1
# Get the list of all indices
GET _cat/indices
GET _cat/indices?v
# Create a new index. No mapping is required
PUT services_catalog_v1
# Create an alias "services"︙
POST _aliases/
{
"actions": [
{
"add": {
"alias": "services",
"index": "services_catalog_v1"
}
}
]
}
# It is the same as
PUT services_catalog_v1/_alias/services
GET _cat/indices?v
GET _cat/aliases
# Mapping and index name work the same
GET services
# is the same as
GET services_catalog_v1
# Add some documents.
# No mapping, no ID needed.
# Every POST creates a new document if ID is not provided in the URL
POST services/service
{
"name": "Fast service",
"description": "<em>The service is providing fast access to <b>anything good</b> you want<em>",
"created": "2017-11-02",
"response_time": {
"gte": 0.001,
"lte": 0.01
}
}
# The mapping is automatically created
GET services/
GET services/_mapping
# List documents, see the ID was autogenerated
GET services/_search
# If ID is provided in the URL, then the document created once and updated after
POST services/service/20
{
"name": "Slow service",
"description": "<em>This service is good in returing a data slowly<em>"
}
# Get the document including an index name, document type, version of the document
GET services/service/20
# Returns only the document itself
GET services/service/20/_source
# You can access the index by the index name and by the alias
GET services_catalog_v1/service/_search
GET services/service/_search
GET services/_search
{
"query": {"match_all": {}}
}
# Languauge specific analysers remove stop words, trim words, lowercase
GET _analyze
{"analyzer": "english",
"text": "This service is GOOD in returing a data slowly"}
GET _analyze
{"analyzer": "english",
"text": "is not good"}
GET _analyze
{"analyzer": "german",
"text": "der Winter ist diesmal nicht eingetreten"}
# Mappings
# Available types: text, date, numeric (integer, float, double, etc.), date, boolean, range (integer_range, float_range, date_range, etc.), geo-point, geo-shapes, JSON object, nested JSON object.
# Arrays are not a separate type, any field can have multiple values. If you write script-queries in Painless you have to check the type.
# Create an index with a mapping
PUT services_v2
{
"mappings": {
"service": {
"properties": {
"description": {
"type": "text",
"analyzer": "english"
},
"created": {
"type": "date"
},
"response_time":{
"type": "float_range"
}
}
}
}
}
# Swap indices
POST /_aliases
{
"actions": [
{ "remove": { "index": "services_catalog_v1", "alias": "services" }},
{ "add": { "index": "services_v2", "alias": "services" }}
]
}
GET services_catalog_v1/service/_mapping
GET services_v2/service/_mapping
GET services/_search
# Reindexing
POST /_reindex
{"source": {"index": "services_catalog_v1"}, "dest": {"index":"services"}}
POST services/_search
# Remove the index
DELETE services_catalog_v1
# Creating documents
POST services/service/22
{
"name": ["Fast service"],
"description": "This service is absolutely the best service comparing to all other services"
}
GET services/service/22
# documents are immutable, they cannot be changed, only replaced
POST services/service/20
{
"name": "Slow service",
"description": "<em>This service is good in returing a data slowly<em>"
}
GET services/service/20
DELETE services/service/20
POST services/service/20/_update
{
"doc": {
"created": "1998-04-13",
"response_time": {
"gte": 0.5,
"lte": 100
}
}
}
# Get all service documents
GET services/service/_search
# Try to search with "good", "is good", "is not good"
GET services/service/_search
{
"query": {
"match": {
"description": "services is good"
}
}
}
# Try to search with "match_phrase"
GET services/service/_search
{
"query": {
"match_phrase": {
"description": "is good"
}
}
}
GET services/service/_search
{
"query": {
"match_phrase": {
"description": "service is provided"
}
}
}
GET services/service/_search
{
"query": {
"match_phrase": {
"description": "good service"
}
}
}
GET services/service/_search
{
"query": {
"match": {
"description": "good service"
}
}
}
# Fuzzy search
GET services/service/_search
{
"query": {
"fuzzy": {
"description": "campari"
}
}
}
GET services/service/_search
{
"query": {
"fuzzy": {
"description": "rest"
}
}
}
# Search by range
GET services/service/_search
{
"query": {
"range": {
"created": {"gte": "1918-01-01", "lte": "now"}
}
}
}
# Search by range
GET services/service/_search
{
"query": {
"range": {
"created": {"gte": "2018-01-01", "lte": "now"}
}
}
}
# Combine queries + highlight description
GET services/service/_search
{
"query": {
"bool": {
"should": [
{"fuzzy": {"description": "serpice"}},
{"range": {"created": {"gte": "2000-01-01", "lte": "now"}}}
]
}
},
"highlight" : {
"fields" : {
"description" : {}
}
}
}
# Customize highlighting
GET services/service/_search
{
"query": {
"match": {
"description": "service is slow"
}
},
"highlight" : {
"pre_tags" : ["<b>"],
"post_tags" : ["</b>"],
"fields" : {
"description" : {"fragment_size" : 35}
}
}
}
# Analyzer is a combination of filters and tokenizers
# https://www.elastic.co/guide/en/elasticsearch/reference/5.4/analysis-lang-analyzer.html#english-analyzer
# Standard english analyser trims words and remove stop words
GET _analyze
{"analyzer": "english",
"text": "<h1>There are no good drivers</h1>"}
# stop filter removes stop words: is, not, are ...
# snowball filter stems words: drivers -> driver
GET _analyze
{
"char_filter": [
"html_strip"
],
"tokenizer": "standard",
"filter": ["stop", "snowball", "lowercase"],
"text": "<h1>There are no GOOD drivers impossible revolution disable</h1>"
}
# Scoring
# Search index takes frequency and matches into account
POST services/service/_search
{
"query": {
"match": {
"description": "service"
}
}
}
# Explain search
POST services/service/_search?explain=true
{
"query": {
"match": {
"description": "service"
}
}
}
# NGram, Trigram, Edge Gram
POST _analyze
{
"tokenizer": {
"type": "ngram",
"min_gram": "3",
"max_gram": "6",
"token_chars": [
"letter"
]
},
"filter": [
"lowercase"
],
"char_filter": ["html_strip"],
"text": [
"1. <b>McDonalds</b> is the most popular restaurant in the world."
]
}
POST _analyze
{
"tokenizer": {
"type": "edge_ngram",
"min_gram": "3",
"max_gram": "4",
"token_chars": [
"letter"
]
},
"filter": [
"lowercase"
],
"char_filter": ["html_strip"],
"text": [
"1. <b>McDonalds</b> is the most popular restaurant in the world."
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment