Last active
April 27, 2022 19:15
-
-
Save literallylara/a04a05ceba27cd03dd43727d60ac9e11 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Performs a fuzzy search on a list of values (haystack) | |
* with the provided query (needle) and sorts each match | |
* based on its scoring. | |
* | |
* TODO: adjust scoring logic | |
* | |
* @author Lara Sophie Schütt (@literallylara) | |
* @license MIT | |
* | |
* @param {Array<string>} haystack | |
* List of values to check against | |
* @param {string} needle | |
* Query to look for | |
* @param {string} [options.pre='<'] | |
* String to be added ***before*** each match | |
* @param {string} [options.post='>'] | |
* String to be added ***after*** each match | |
* @param {boolean} [options.caseSensitive=false] | |
* Whether or not to consider the case of each value | |
* @returns {[...{ | |
* score: number; | |
* match: string; | |
* subject: string; | |
* }]} | |
* | |
* Filtered list of matches sorted by their score. | |
* Each match has the following properties: | |
* | |
* - `subject`: The original string | |
* - `match`: Same as `subject` but each match is wrapped in | |
* `options.pre` and `options.post` | |
* - `score`: Percentage from 0...1 indicating how closely | |
* the match matches the subject | |
*/ | |
export default function fuzzySearch(haystack, needle, | |
{ | |
pre = '<', | |
post = '>', | |
caseSensitive = false | |
} = {}) | |
{ | |
const needleCase = needle | |
const needleLength = needle.length | |
if (!caseSensitive) | |
{ | |
needle = needle.toLowerCase() | |
} | |
const matches = haystack.map(subject => | |
{ | |
const subjectCase = subject | |
if (!caseSensitive) | |
{ | |
subject = subject.toLowerCase() | |
} | |
const matchGroups = [] | |
// for each character in the needle | |
// -> find the longest match starting from that character | |
// -> append it to a list where the list does not contain an | |
// overlaping match or needle portion | |
for (let needleIndex = 0; needleIndex < needleLength; needleIndex++) | |
{ | |
// skip spaces | |
if (needle[needleIndex] === ' ') continue | |
let matchIndex = null | |
let matchLength = 0 | |
while (true) | |
{ | |
// find index in subject for a portion of the needle | |
const index = subject.indexOf( | |
needle.substring( | |
needleIndex, | |
needleIndex + matchLength + 1 | |
) | |
) | |
// if we found a match, keep track of the | |
// index and character length (range) and make sure | |
// we are not exceeding the needle length | |
if (index !== -1 && needleIndex + matchLength < needleLength) | |
{ | |
matchIndex = index | |
matchLength++ | |
} | |
else break | |
} | |
// no match found? try starting from the next needle character | |
if (!matchLength) continue | |
// find a group where neither the match nor the needle range | |
// overlaps with the other match/needle ranges in the group | |
let group = matchGroups.find(group => | |
{ | |
return group.every(match => | |
{ | |
return ( | |
// match before current match | |
match[0] + match[2] <= matchIndex | |
// or match after current match | |
|| match[0] >= matchIndex + matchLength | |
) && ( | |
// needle before current needle | |
match[1] + match[2] <= needleIndex | |
// or needle after current needle | |
|| match[1] >= needleIndex + matchLength | |
) | |
}) | |
}) | |
// no group? create and append it | |
if (!group) matchGroups.push(group = []) | |
group.push([matchIndex, needleIndex, matchLength]) | |
} | |
// no match found for this subject | |
if (!matchGroups.length) | |
{ | |
return null | |
} | |
else | |
{ | |
// find the match group with the highest score | |
// for this subject | |
return matchGroups.map(group => | |
{ | |
let match = subjectCase | |
let points = 0 | |
let maxPoints = subject.length * 6 | |
group | |
.sort((a,b) => a[0] - b[0]) | |
.forEach((v,i) => | |
{ | |
const [matchIndex, needleIndex, matchLength] = v | |
const hayEnd = matchIndex + matchLength | |
const offset = i ? (pre+post).length*i : 0 | |
// one point for each character | |
points += matchLength | |
// two points for each case-matching character | |
for (let i = 0; i < matchLength; i++) | |
{ | |
if (subjectCase[matchIndex+i] === needleCase[needleIndex+i]) | |
{ | |
points += 2 | |
} | |
} | |
// three points for each index-matching character | |
if (matchIndex === needleIndex) | |
{ | |
points += matchLength * 3 | |
} | |
// wrap the matches in the pre and post strings | |
match = match.substring(0,matchIndex+offset) | |
+ pre | |
+ match.substring(matchIndex+offset,hayEnd+offset) | |
+ post | |
+ match.substring(hayEnd+offset,Infinity) | |
}) | |
return { | |
subject: subjectCase, | |
needle: needle, | |
match, | |
score: points/maxPoints, | |
meta: group | |
} | |
}) | |
.sort((a,b) => b.score - a.score)[0] | |
} | |
}) | |
// filter out subjects with no match | |
.filter(v => v) | |
// sort by score (high to low) | |
.sort((a,b) => b.score - a.score) | |
return matches.length ? matches : null | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Baisc usage: