Last active
June 29, 2024 01:21
-
-
Save kaizhu256/8ec8f605ad2b99afff08a9969d3fbfe9 to your computer and use it in GitHub Desktop.
This gist file demos a performant, self-contained function "globExclude()", which batch-globs <pathnameList> in a single pass, with given filters <excludeList>, <includeList>.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*jslint beta, node*/ | |
// This gist file demos a performant, self-contained function "globExclude()", | |
// which batch-globs <pathnameList> in a single pass, | |
// with given filters <excludeList>, <includeList>. | |
// | |
// Could be useful if you need to glob thousands of files for test-coverage, | |
// or other purposes. | |
// Example usage | |
/* | |
Output from below code: | |
[ | |
'.eslintrc.js', | |
'deps/extract.js', | |
'lib', | |
'lib/sqlite3-binding.js', | |
'lib/sqlite3.js', | |
'lib/trace.js', | |
'tools/benchmark/insert.js' | |
] | |
*/ | |
console.log( | |
globExclude({ | |
excludeList: [ | |
"tes?/", | |
"tes[!0-9A-Z_a-z-]/", | |
"tes[0-9A-Z_a-z-]/", | |
"tes[^0-9A-Z_a-z-]/", | |
"test/**/*.js", | |
"test/suppor*/*elper.js", | |
"test/suppor?/?elper.js", | |
"test/support/helper.js" | |
], | |
includeList: [ | |
"**/*.cjs", | |
"**/*.js", | |
"**/*.mjs", | |
"li*/*.js", | |
"li?/*.js", | |
"lib/", | |
"lib/**/*.js", | |
"lib/*.js", | |
"lib/sqlite3.js" | |
], | |
pathnameList: [ | |
".", | |
".dockerignore", | |
".eslintrc.js", | |
".github", | |
".github/workflows", | |
".github/workflows/ci.yml", | |
".gitignore", | |
"LICENSE", | |
"README.md", | |
"binding.gyp", | |
"deps", | |
"deps/common-sqlite.gypi", | |
"deps/extract.js", | |
"lib", | |
"lib/sqlite3-binding.js", | |
"lib/sqlite3.js", | |
"lib/trace.js", | |
"package.json", | |
"src", | |
"src/async.h", | |
"src/backup.cc", | |
"src/backup.h", | |
"src/database.cc", | |
"src/database.h", | |
"src/gcc-preinclude.h", | |
"src/macros.h", | |
"src/node_sqlite3.cc", | |
"src/statement.cc", | |
"src/statement.h", | |
"src/threading.h", | |
"test", | |
"test/.eslintrc.js", | |
"test/affected.test.js", | |
"test/async_calls.test.js", | |
"test/backup.test.js", | |
"test/blob.test.js", | |
"test/cache.test.js", | |
"test/constants.test.js", | |
"test/database_fail.test.js", | |
"test/each.test.js", | |
"test/exec.test.js", | |
"tools", | |
"tools/benchmark", | |
"tools/benchmark/insert.js" | |
] | |
}).pathnameList | |
); | |
function globExclude({ | |
excludeList = [], | |
includeList = [], | |
pathnameList = [] | |
}) { | |
// This function will | |
// 1. Exclude pathnames in <pathnameList> that don't match glob-patterns in | |
// <includeList>. | |
// 2. Exclude pathnames in <pathnameList> that match glob-patterns in | |
// <excludeList>. | |
function globAssertNotWeird(list, name) { | |
// This function will check if <list> of strings contain weird characters. | |
[ | |
[ | |
"\n", ( | |
/^.*?([\u0000-\u0007\r]).*/gm | |
) | |
], | |
[ | |
"\r", ( | |
/^.*?([\n]).*/gm | |
) | |
] | |
].forEach(function ([ | |
separator, rgx | |
]) { | |
list.join(separator).replace(rgx, function (match0, char) { | |
throw new Error( | |
"Weird character " | |
+ JSON.stringify(char) | |
+ " found in " + name + " " | |
+ JSON.stringify(match0) | |
); | |
}); | |
}); | |
} | |
function globToRegexp(pattern) { | |
// This function will translate glob <pattern> to javascript-regexp, | |
// which javascript can then use to "glob" pathnames. | |
let ii = 0; | |
let isClass = false; | |
let strClass = ""; | |
let strRegex = ""; | |
pattern = pattern.replace(( | |
/\/\/+/g | |
), "/"); | |
pattern = pattern.replace(( | |
/\*\*\*+/g | |
), "**"); | |
pattern.replace(( | |
/\\\\|\\\[|\\\]|\[|\]|./g | |
), function (match0) { | |
switch (match0) { | |
case "[": | |
if (isClass) { | |
strClass += "["; | |
return; | |
} | |
strClass += "\u0000"; | |
strRegex += "\u0000"; | |
isClass = true; | |
return; | |
case "]": | |
if (isClass) { | |
isClass = false; | |
return; | |
} | |
strRegex += "]"; | |
return; | |
default: | |
if (isClass) { | |
strClass += match0; | |
return; | |
} | |
strRegex += match0; | |
} | |
return ""; | |
}); | |
strClass += "\u0000"; | |
// An expression "[!...]" matches a single character, namely any character that | |
// is not matched by the expression obtained by removing the first '!' from it. | |
// (Thus, "[!a-]" matches any single character except 'a', and '-'.) | |
strClass = strClass.replace(( | |
/\u0000!/g | |
), "\u0000^"); | |
// One may include '-' in its literal meaning by making it the first or last | |
// character between the brackets. | |
strClass = strClass.replace(( | |
/\u0000-/g | |
), "\u0000\\-"); | |
strClass = strClass.replace(( | |
/-\u0000/g | |
), "\\-\u0000"); | |
// Escape brackets '[', ']' in character class. | |
strClass = strClass.replace(( | |
/[\[\]]/g | |
), "\\$&"); | |
// https://stackoverflow.com/questions/3561493 | |
// /is-there-a-regexp-escape-function-in-javascript | |
// $()*+-./?[\]^{|} | |
strRegex = strRegex.replace(( | |
// Ignore [-/]. | |
/[$()*+.?\[\\\]\^{|}]/g | |
), "\\$&"); | |
// Expand wildcard '**/*'. | |
strRegex = strRegex.replace(( | |
/\\\*\\\*\/(?:\\\*)+/g | |
), ".*?"); | |
// Expand wildcard '**'. | |
strRegex = strRegex.replace(( | |
/(^|\/)\\\*\\\*(\/|$)/gm | |
), "$1.*?$2"); | |
// Expand wildcard '*'. | |
strRegex = strRegex.replace(( | |
/(?:\\\*)+/g | |
), "[^\\/]*?"); | |
// Expand wildcard '?'. | |
strRegex = strRegex.replace(( | |
/\\\?/g | |
), "[^\\/]"); | |
// Expand directory-with-trailing-slash '.../'. | |
strRegex = strRegex.replace(( | |
/\/$/gm | |
), "\\/.*?"); | |
// Merge strClass into strRegex. | |
ii = 0; | |
strClass = strClass.split("\u0000"); | |
strRegex = strRegex.replace(( | |
/\u0000/g | |
), function () { | |
ii += 1; | |
if (strClass[ii] === "") { | |
return ""; | |
} | |
return "[" + strClass[ii] + "]"; | |
}); | |
// Change strRegex from string to regexp. | |
strRegex = new RegExp("^" + strRegex + "$", "gm"); | |
return strRegex; | |
} | |
// Validate excludeList, includeList, pathnameList. | |
globAssertNotWeird(excludeList, "pattern"); | |
globAssertNotWeird(includeList, "pattern"); | |
globAssertNotWeird(pathnameList, "pathname"); | |
// Optimization | |
// Concat pathnames into a single, newline-separated string, | |
// whose pathnames can all be filtered with a single, regexp-pass. | |
pathnameList = pathnameList.join("\n"); | |
// 1. Exclude pathnames in <pathnameList> that don't match glob-patterns in | |
// <includeList>. | |
if (includeList.length > 0) { | |
includeList = includeList.map(globToRegexp); | |
includeList.forEach(function (pattern) { | |
pathnameList = pathnameList.replace(pattern, "\u0000$&"); | |
}); | |
pathnameList = pathnameList.replace(( | |
/^[^\u0000].*/gm | |
), ""); | |
pathnameList = pathnameList.replace(( | |
/^\u0000+/gm | |
), ""); | |
} | |
// 2. Exclude pathnames in <pathnameList> that match glob-patterns in | |
// <excludeList>. | |
excludeList = excludeList.map(globToRegexp); | |
excludeList.forEach(function (pattern) { | |
pathnameList = pathnameList.replace(pattern, ""); | |
}); | |
// Split newline-separated pathnames back to list. | |
pathnameList = pathnameList.split("\n").filter(function (elem) { | |
return elem; | |
}); | |
return { | |
excludeList, | |
includeList, | |
pathnameList | |
}; | |
} | |
export { | |
globExclude | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment