Last active
February 2, 2022 21:27
-
-
Save vpalos/4334557 to your computer and use it in GitHub Desktop.
JS: A simple search function designed for filtering large lists of strings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Demo: http://vpalos.com/sandbox/filter.js/ | |
* | |
* A generic search algorithm designed for filtering (very) large lists of strings; when an input string | |
* contains all the parts (words or characters; whitespace is ignored) of the query, spread-out over the text | |
* then the string is considered to be a match. It works with the way internet browsers (e.g. Firefox, Google | |
* Chrome) filter address-bar suggestions on user input. It is also quite fast; on my i7 laptop, filtering | |
* 1) a list of ~23000 items takes around 50ms (yes, milliseconds!); | |
* 2) a list of ~1 million text items took under 1 second. | |
* It works both in NodeJS as well as in browser environments (so far I only tested FF and GC). | |
* | |
* It has two functioning modes: | |
* 1) word-mode: each (whitespace-separated) word in the input query must be found **whole** in the text: | |
* e.g. "foo bar" will match "123foo456bar789" but not "f oo ba r"; | |
* 2) charater-mode: the input query is matched per-character (whitespace is completely ignored): | |
* e.g. "foo bar" will match "f o o b a r" and even "-f.oo-ba.r-". | |
* | |
* Options (values below are the defaults): | |
* { | |
* "case": false, // true: case-sensitive | |
* // false: case-insensitive | |
* | |
* "mark": true, // true: returns item numbers + matches with highlighted captures | |
* // false: returns item numbers only | |
* | |
* "prefix": "<strong>", // highlight prefix string | |
* "suffix": "</strong>", // highlight suffix string | |
* | |
* "word": true, // true: whole-word mode | |
* // false: character mode | |
* | |
* "limit": 0 // limit results to this amount | |
* // 0 means return the whole result (unlimited) | |
* } | |
* | |
* @param {string} query The search query, consisting of space-separated chunks of characters. | |
* @param {string|array} items A string or an array of strings to filter; if a string is given then it is | |
* first split into an array of individual lines of text and then filtered (note | |
* that). | |
* @param {string} prefix String which will come before every highlighted substring (i.e. capture). | |
* @param {string} suffix String which will come after every highlighted substring (i.e. capture). | |
* @return {object} Returns the following object with matched items information: | |
* { | |
* "items": [...], // array of matched item-numbers | |
* "marks": [...] // if mark == true, array of matches with highlighted captures | |
* } | |
*/ | |
function filter(query, items, options) { | |
// option producer | |
function option(name, value) { | |
options = options || {}; | |
return typeof(options[name]) !== 'undefined' ? options[name] : value; | |
} | |
// prepare options | |
var o_case = option("case", false); | |
var o_mark = option("mark", true); | |
var o_prefix = option("prefix", "<strong>"); | |
var o_suffix = option("suffix", "</strong>"); | |
var o_word = option("word", true); | |
var o_limit = option("limit", 0); | |
// prepare query | |
query = o_case ? query : query.toLowerCase(); | |
query = query.replace(/\s+/g, o_word ? ' ' : ''); | |
query = query.replace(/(^\s+|\s+$)/g, ''); | |
query = query.split(o_word ? ' ' : ''); | |
var ql = query.length; | |
// prepare items | |
if (typeof(items) === "string") { | |
items = items.split('\n'); | |
} | |
// prepare results | |
var matches = { | |
items: [], | |
marks: [] | |
}; | |
// search | |
for (var ii = 0, il = items.length; ii < il; ii++) { | |
// prepare text | |
var text = o_case ? items[ii] : items[ii].toLowerCase(); | |
var mark = ""; | |
// traverse | |
var ti = 0; | |
var wi = 0; | |
var wl = 0; | |
for (var qi = 0; qi < ql; qi++) { | |
wl = query[qi].length; | |
wi = text.indexOf(query[qi], ti); | |
if (wi === -1) { | |
break; | |
} | |
if (o_mark) { | |
if (wi > 0) { | |
mark += items[ii].slice(ti, wi); | |
} | |
mark += o_prefix + items[ii].slice(wi, wi + wl) + o_suffix; | |
} | |
ti = wi + wl; | |
} | |
// capture | |
if (qi == ql) { | |
if (o_mark) { | |
mark += items[ii].slice(ti); | |
matches.marks.push(mark); | |
} | |
if (matches.items.push(ii) === o_limit && o_limit) { | |
break; | |
} | |
} | |
} | |
// ready | |
return matches; | |
} |
How I would like to filter first char instead of whole line?
Hi, what's the license on this code? MIT?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you describe the algrorithm, it will be useful for people like me,
UPDATE : I am trying understand the code a couple of hours, Only the logic that I got is using
indexOf
, rest of the code emphasize the results only ( I mean secondfor...loop
andcapture
part) it is still super fast, definetely. I might expecting filter function backed by some known algorithm, but I couldn't match any. If you do, please enlight me.