Skip to content

Instantly share code, notes, and snippets.

@cdosborn
Forked from sod/giganticString.js
Last active February 5, 2024 18:38
Show Gist options
  • Save cdosborn/32c2a4a688f9c8eec41c3aac1115aa18 to your computer and use it in GitHub Desktop.
Save cdosborn/32c2a4a688f9c8eec41c3aac1115aa18 to your computer and use it in GitHub Desktop.
A modified interface to the SOD search algorithm ;)
import escapeStringRegexp from "escape-string-regexp";
import bs from "binary-search";
// interface SearchFunc<T> {
// (needle?: string, maxResults: number): T[]
// }
// search = searcher(d => [d.name, d.age], [{name:"a", age:4}, ...])
// search("4") -> {name:"a", age:4}
const searcher = <T>(
accessor: (_d: T) => string[],
array: T[]
): ((needle: string, maxResults?: number) => T[]) => {
const offsets: number[] = [];
let haystack = "";
for (let i = 0; i < array.length; i++) {
const keys = accessor(array[i]);
for (let k = 0; k < keys.length; k++) {
let key = keys[k];
key = key || ""; // Coerce null/undefined to ""
key = key + ""; // Coerce to a string type
// EsLint warns against putting \x00 in a regex, because users are
// unlikely to supply it as input. While its unlikely, it would break
// this function, if our delimiter (\x00) were provided by the user
//
// eslint-disable-next-line
key = key.replace(/\x00/g, "");
haystack += "\x00" + key;
}
offsets.push(haystack.length);
}
return (needle?: string, maxResults = array.length) => {
// See note above
//
// eslint-disable-next-line
needle = (needle || "").replace(/\x00/g, "");
needle = escapeStringRegexp(needle);
if (!needle) {
return array.slice(0, maxResults);
}
const results = [];
const rx = new RegExp(needle, "gi");
const uniqueIndexes: { [index: number]: boolean } = {};
let result;
while (results.length < maxResults && (result = rx.exec(haystack))) {
const matchIndex = result.index;
let index = bs(offsets, matchIndex, (a, b) => a - b);
if (index < 0) {
index = (index + 1) * -1;
}
if (index in uniqueIndexes) {
// searching for 6 in 666, would match that term 3 times, only want to
// record the first time it resolves to the index for 666
continue;
}
uniqueIndexes[index] = true;
results.push(array[index]);
}
return results;
};
};
export default searcher;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment