Created
March 18, 2021 10:56
-
-
Save spinscale/c3508e19bf4874581f70f44f84d28542 to your computer and use it in GitHub Desktop.
Elastic Bytes - rank/distance feature
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################### | |
## Boosting scores based on numeric field values ## | |
################################################### | |
# Existing solution: function_score query | |
# Problem: Performance penalty | |
# Solution: Rescoring, only rescoring top-n documents | |
# Lucene optimization in Elasticsearch 7: Block MAX WAND | |
# New query: distance_feature, efficiently skips non-competitive hits | |
# Works with geopoint, date and date_nanos | |
GET / | |
PUT hotels | |
{ | |
"mappings": { | |
"properties": { | |
"location" : { "type" : "geo_point" }, | |
"last_booked" : { "type" : "date" } | |
} | |
} | |
} | |
PUT hotels/_doc/motel-one-central | |
{ | |
"name" : "Motel One - Munich Sonnenstraße", | |
"last_booked" : "2021-03-02", | |
"stars": 5, | |
"location": { "lat" : 48.1345, "lon":11.5669 } | |
} | |
PUT hotels/_doc/motel-one-schwabing | |
{ | |
"name" : "Motel One - Munich Parkstadt Schwabing", | |
"last_booked" : "2021-01-01", | |
"stars": 4, | |
"location": { "lat" : 48.1778 , "lon":11.5919 } | |
} | |
PUT hotels/_doc/ibis-west?refresh | |
{ | |
"name" : "Ibis Hotel/Motel - München Süd/West", | |
"last_booked" : "2020-12-15", | |
"stars": 2, | |
"location": { "lat" : 48.1347 , "lon":11.5241 } | |
} | |
GET hotels/_search | |
GET hotels/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"name": "motel" | |
} | |
} | |
] | |
} | |
} | |
} | |
# score low budget hotels higher | |
GET hotels/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"name": "motel" | |
} | |
} | |
], | |
"should": [ | |
{ | |
"range": { | |
"stars": { | |
"lte": 3 | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
# main station: 48.14, 11.5595 | |
# let's give those hotels a boost next to the main station | |
GET hotels/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"name": "motel" | |
} | |
} | |
], | |
"should": [ | |
{ | |
"distance_feature": { | |
"field": "location", | |
"origin": { "lat" : 48.14, "lon" : 11.5595}, | |
"pivot": "1km" | |
} | |
} | |
] | |
} | |
} | |
} | |
# BMW World/Olympiapark: 48.1776, 11.5561 | |
GET hotels/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"name": "motel" | |
} | |
} | |
], | |
"should": [ | |
{ | |
"distance_feature": { | |
"field": "location", | |
"origin": { "lat" : 48.1776, "lon" : 11.5561}, | |
"pivot": "3km" | |
} | |
} | |
] | |
} | |
} | |
} | |
# airport location: 48.3539, 11.7882 | |
GET hotels/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"name": "motel" | |
} | |
} | |
], | |
"should": [ | |
{ | |
"distance_feature": { | |
"field": "location", | |
"origin": { "lat" : 48.3539, "lon" : 11.7882}, | |
"pivot": "1km" | |
} | |
} | |
] | |
} | |
} | |
} | |
# score hotels higher that have been booked more recently | |
GET hotels/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"range": { | |
"stars": { | |
"gte": 2 | |
} | |
} | |
} | |
], | |
"should": [ | |
{ | |
"distance_feature": { | |
"field": "last_booked", | |
"origin": "2021-03-17", | |
"pivot": "30d" | |
} | |
} | |
] | |
} | |
} | |
} | |
# but how to boost by four star rating? | |
# well distance feature only supports geo points and dates | |
# Alternative: rank_feature | |
POST hotels/_mapping | |
{ | |
"properties" : { | |
"rating" : { | |
"type" : "rank_feature" | |
} | |
} | |
} | |
GET hotels/_mapping | |
POST hotels/_update/motel-one-central | |
{ | |
"doc" : { | |
"rating" : 5.0 | |
} | |
} | |
POST hotels/_update/motel-one-schwabing | |
{ | |
"doc" : { | |
"rating" : 3.8 | |
} | |
} | |
POST hotels/_update/ibis-west?refresh | |
{ | |
"doc" : { | |
"rating" : 3.1, | |
"name" : "Ibis Motel - München West" | |
} | |
} | |
# First two same score | |
GET hotels/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"name": "motel" | |
} | |
} | |
] | |
} | |
} | |
} | |
# add explanation | |
GET hotels/_search | |
{ | |
"explain": true, | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"name": "motel" | |
} | |
} | |
], | |
"should": [ | |
{ | |
"rank_feature": { | |
"field": "rating" | |
} | |
} | |
] | |
} | |
} | |
} | |
# difference rank_feature vs. distance features | |
# point of reference from the outside vs. document values | |
# both skip non competitive hits (biggest difference to function score) | |
# More info: https://www.elastic.co/guide/en/elasticsearch/reference/7.11/query-dsl-distance-feature-query.html | |
# More info: https://www.elastic.co/guide/en/elasticsearch/reference/7.11/query-dsl-rank-feature-query.html | |
# https://www.elastic.co/blog/easier-relevance-tuning-elasticsearch-7-0 | |
# https://www.elastic.co/blog/distance-feature-query-time-and-geo-in-elasticsearch-result-ranking | |
# https://www.elastic.co/blog/faster-retrieval-of-top-hits-in-elasticsearch-with-block-max-wand | |
# Kahoot: https://kahoot.it/challenge/09692849 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment