Created
April 11, 2019 09:44
-
-
Save DaRaFF/874f7b222b44a96eb4b444d315af5b36 to your computer and use it in GitHub Desktop.
Example of a better document search query for Livingdocs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const _ = require('lodash') | |
// Creates an Elasticsearch query | |
// | |
// Use multiplication to combine time and query score | |
// Add a minimum function_score for old documents, otherwise it would | |
// multiply with 0 after 1000 days of linear boost | |
// | |
// @param {String} searchQuery 'hello world' | |
// @returns {Object} Elasticsearch body.query | |
module.exports = function (searchQuery) { | |
if (_.isEmpty(searchQuery)) { return } | |
return { | |
bool: { | |
must: [ | |
{ | |
function_score: { | |
query: { | |
bool: { | |
should: getBoolQuery(searchQuery) | |
} | |
}, | |
functions: getFunctionsForScore(), | |
score_mode: 'max', | |
boost_mode: 'multiply' | |
} | |
} | |
] | |
} | |
} | |
} | |
const getBoolQuery = function (searchQuery) { | |
const trimmedSearchQuery = searchQuery.trim() | |
// exact match a document.id | |
if (/^\d+$/g.test(trimmedSearchQuery)) { | |
const documentId = parseInt(trimmedSearchQuery) | |
return [getExactMatchDocumentIdQuery(documentId)] | |
} | |
// query has a quotation | |
const prefixQuery = stripQuotationOrReturnUndefined(searchQuery) | |
if (prefixQuery) { | |
return [getPhrasePrefixQuery(prefixQuery)] | |
} | |
// default | |
return [ | |
getPhrasePrefixQuery(searchQuery), | |
getExactFieldsQuery(searchQuery), | |
getFullTextQuery(searchQuery) | |
] | |
} | |
function getFunctionsForScore () { | |
return [ | |
{ | |
gauss: { | |
'document.updated_at': { | |
scale: '14d', | |
decay: 0.5, | |
offset: '7d' | |
} | |
}, | |
weight: 1 | |
}, | |
{ | |
linear: { | |
'document.updated_at': { | |
scale: '1000d', | |
decay: 0.5, | |
offset: '0' | |
} | |
}, | |
weight: 0.1 | |
}, | |
{ | |
weight: 0.01 | |
} | |
] | |
} | |
const getPhrasePrefixQuery = function (searchQuery) { | |
const boost = 1.0 | |
return { | |
multi_match: { | |
boost: boost, | |
query: searchQuery, | |
type: 'phrase_prefix', | |
fields: ['document.title^3', 'document.html'] | |
} | |
} | |
} | |
const getExactFieldsQuery = function (searchQuery) { | |
const boost = 100.0 | |
const fieldsToQuery = ['document.title^5', 'document.html'] | |
const tieBreaker = 0.3 | |
return { | |
multi_match: { | |
boost: boost, | |
query: searchQuery, | |
type: 'best_fields', // take the score of the best field and .. | |
fields: fieldsToQuery, | |
tie_breaker: tieBreaker, // .. add 30% of the score of all other matching fields | |
operator: 'and' | |
} | |
} | |
} | |
const getFullTextQuery = function (searchQuery) { | |
const boost = 1.0 | |
const fieldsToQuery = ['document.title^5', 'document.html'] | |
const tieBreaker = 0.3 | |
return { | |
multi_match: { | |
boost: boost, | |
query: searchQuery, | |
type: 'best_fields', | |
fields: fieldsToQuery, | |
tie_breaker: tieBreaker, | |
operator: 'or' | |
} | |
} | |
} | |
const getExactMatchDocumentIdQuery = function (documentId) { | |
return { | |
match: { | |
'document.id': { | |
query: documentId | |
} | |
} | |
} | |
} | |
const stripQuotationOrReturnUndefined = function (searchQuery) { | |
const quotationRegex = /^"(.*)"$/g | |
const regexResult = quotationRegex.exec(searchQuery) | |
if (regexResult) { | |
return regexResult[1] | |
} | |
return undefined | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment