-
-
Save mikedeboer/685239acc409542af48a to your computer and use it in GitHub Desktop.
Node.JS wrapped GREP search command builder... ack on steroids, if you will!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/* Any copyright is dedicated to the Public Domain. | |
* http://creativecommons.org/publicdomain/zero/1.0/ */ | |
"use strict"; | |
var Fs = require("fs"); | |
var Path = require("path"); | |
var Spawn = require("child_process").spawn; | |
function escapeRegExp(str) { | |
return str.replace(/([.*+?\^${}()|\[\]\/\\])/g, "\\$1"); | |
}; | |
// taken from http://xregexp.com/ | |
function grepEscapeRegExp(str) { | |
return str.replace(/[[\]{}()*+?.,\\^$|#\s"']/g, "\\$&"); | |
} | |
function escapeShell(str) { | |
return str.replace(/([\\"'`$\s\(\)<>])/g, "\\$1"); | |
} | |
/** | |
* Make sure that an array instance contains only unique values (NO duplicates). | |
* | |
* @type {Array} | |
*/ | |
function makeUnique(arr) { | |
var i, length, newArr = []; | |
for (i = 0, length = arr.length; i < length; i++) { | |
if (newArr.indexOf(arr[i]) == -1) | |
newArr.push(arr[i]); | |
} | |
arr.length = 0; | |
for (i = 0, length = newArr.length; i < length; i++) | |
arr.push(newArr[i]); | |
return arr; | |
} | |
/** | |
* Removes trailing whitespace | |
* version: 1107.2516 | |
* from: http://phpjs.org/functions/rtrim | |
* original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) | |
* example 1: rtrim(' Kevin van Zonneveld '); | |
* returns 1: ' Kevin van Zonneveld' | |
*/ | |
function rtrim(str, charlist) { | |
charlist = !charlist ? " \\s\u00A0" : (charlist + "").replace(/([\[\]\(\)\.\?\/\*\{\}\+\$\^\:])/g, "\\$1"); | |
var re = new RegExp("[" + charlist + "]+$", "g"); | |
return (str + "").replace(re, ""); | |
} | |
var DEBUG = 0;//1; | |
var IGNORE_DIRS = { | |
".bzr" : "Bazaar", | |
".cdv" : "Codeville", | |
"~.dep" : "Interface Builder", | |
"~.dot" : "Interface Builder", | |
"~.nib" : "Interface Builder", | |
"~.plst" : "Interface Builder", | |
".git" : "Git", | |
".hg" : "Mercurial", | |
".pc" : "quilt", | |
".svn" : "Subversion", | |
"_MTN" : "Monotone", | |
"blib" : "Perl module building", | |
"CVS" : "CVS", | |
"RCS" : "RCS", | |
"SCCS" : "SCCS", | |
"_darcs" : "darcs", | |
"_sgbak" : "Vault/Fortress", | |
"autom4te.cache" : "autoconf", | |
"cover_db" : "Devel::Cover", | |
"_build" : "Module::Build" | |
}; | |
var MAPPINGS = { | |
"actionscript": ["as", "mxml"], | |
"ada" : ["ada", "adb", "ads"], | |
"asm" : ["asm", "s"], | |
"batch" : ["bat", "cmd"], | |
//"binary" : q{Binary files, as defined by Perl's -B op (default: off)}, | |
"cc" : ["c", "h", "xs"], | |
"cfmx" : ["cfc", "cfm", "cfml"], | |
"clojure" : ["clj"], | |
"cpp" : ["cpp", "cc", "cxx", "m", "hpp", "hh", "h", "hxx"], | |
"csharp" : ["cs"], | |
"css" : ["css", "less", "scss", "sass"], | |
"coffee" : ["coffee"], | |
"elisp" : ["el"], | |
"erlang" : ["erl", "hrl"], | |
"fortran" : ["f", "f77", "f90", "f95", "f03", "for", "ftn", "fpp"], | |
"haskell" : ["hs", "lhs"], | |
"hh" : ["h"], | |
"html" : ["htm", "html", "shtml", "xhtml"], | |
"jade" : ["jade"], | |
"java" : ["java", "properties"], | |
"groovy" : ["groovy"], | |
"js" : ["js"], | |
"json" : ["json"], | |
"latex" : ["latex", "ltx"], | |
"jsp" : ["jsp", "jspx", "jhtm", "jhtml"], | |
"lisp" : ["lisp", "lsp"], | |
"lua" : ["lua"], | |
"make" : ["makefile", "Makefile"], | |
"mason" : ["mas", "mhtml", "mpl", "mtxt"], | |
"markdown" : ["md", "markdown"], | |
"objc" : ["m", "h"], | |
"objcpp" : ["mm", "h"], | |
"ocaml" : ["ml", "mli"], | |
"parrot" : ["pir", "pasm", "pmc", "ops", "pod", "pg", "tg"], | |
"perl" : ["pl", "pm", "pod", "t"], | |
"php" : ["php", "phpt", "php3", "php4", "php5", "phtml"], | |
"plone" : ["pt", "cpt", "metadata", "cpy", "py"], | |
"powershell" : ["ps1"], | |
"python" : ["py"], | |
"rake" : ["rakefile"], | |
"ruby" : ["rb", "ru", "rhtml", "rjs", "rxml", "erb", "rake", "gemspec"], | |
"scala" : ["scala"], | |
"scheme" : ["scm", "ss"], | |
"shell" : ["sh", "bash", "csh", "tcsh", "ksh", "zsh"], | |
//"skipped" : "q"{"Files but not directories normally skipped by ack ("default": "off")}, | |
"smalltalk" : ["st"], | |
"sql" : ["sql", "ctl"], | |
"tcl" : ["tcl", "itcl", "itk"], | |
"tex" : ["tex", "cls", "sty"], | |
"text" : ["txt"], | |
"textile" : ["textile"], | |
"tt" : ["tt", "tt2", "ttml"], | |
"vb" : ["bas", "cls", "frm", "ctl", "vb", "resx"], | |
"vim" : ["vim"], | |
"yaml" : ["yaml", "yml"], | |
"xml" : ["xml", "dtd", "xslt", "ent", "rdf", "rss", "svg", "wsdl", "atom", "mathml", "mml"] | |
}; | |
var exts = []; | |
for (var type in MAPPINGS) { | |
exts = exts.concat(MAPPINGS[type]); | |
} | |
// grep pattern matching for extensions | |
var PATTERN_EXT = makeUnique(exts).join(","); | |
var dirs = []; | |
for (type in IGNORE_DIRS) { | |
dirs.push(type); | |
} | |
dirs = makeUnique(dirs); | |
var PATTERN_DIR = escapeRegExp(dirs.join("|")); | |
var PATTERN_EDIR = dirs.join(","); | |
var GREP_CMD = "grep"; | |
var PERL_CMD = "perl"; | |
var PREVIOUS_FILE; | |
var ARGS_MAP = { | |
casesensitive: { | |
short: "i", | |
type: "boolean", | |
def: false, | |
desc: "whether to match strings sensitive to upper or lower case characters" | |
}, | |
pattern: { | |
short: "p", | |
type: "string", | |
desc: "a comma-separated list of file patterns to include in the search" | |
}, | |
maxresults: { | |
short: "m", | |
type: "number", | |
desc: "the maximum number of search results to return" | |
}, | |
wholeword: { | |
short: "w", | |
type: "boolean", | |
def: false, | |
desc: "only match strings that are delimited by non-word characters" | |
}, | |
query: { | |
short: "q", | |
type: "string", | |
index: 0, | |
req: true, | |
desc: "string to search for. The needle. Put quotes around queries with multiple words or regular expressions" | |
}, | |
path: { | |
short: "p", | |
type: "string", | |
def: process.cwd(), | |
index: 1, | |
desc: "specific path to use as the root of the search operation. Defaults to the current working dir" | |
}, | |
replaceAll: { | |
short: "a", | |
type: "boolean", | |
def: false, | |
desc: "during a replace action, replace all occurrences" | |
}, | |
replace: { | |
short: "r", | |
type: "string", | |
def: false, | |
desc: "string to replace a match with" | |
}, | |
regexp: { | |
short: "x", | |
type: "boolean", | |
def: false, | |
desc: "whether [query] should be interpreted as a regular expression" | |
} | |
}; | |
var ARGS_MAP_SHORT = {}; | |
var ARGS_MAP_INDEXED = []; | |
var ARG_NAMES = Object.keys(ARGS_MAP); | |
ARG_NAMES.forEach(function(arg) { | |
ARGS_MAP[arg].name = arg; | |
var shortName = ARGS_MAP[arg].short; | |
if (shortName) | |
ARGS_MAP_SHORT[shortName] = arg; | |
var index = ARGS_MAP[arg].index; | |
if (typeof index == "number") { | |
ARGS_MAP_INDEXED.splice(index >= 0 || index < ARGS_MAP_INDEXED.length - 1 ? | |
index : Infinity, 0, ARGS_MAP[arg]); | |
} | |
}); | |
function usageDef(arg, indexed) { | |
return (arg.short && !indexed ? "-" + arg.short + "/ " : "") | |
+ (!indexed ? "--" : "") + arg.name | |
+ "\t\t" + (arg.desc ? arg.desc : "") | |
+ (arg.req ? " [required]" : (typeof arg.def != "undefined" ? | |
" [default=" + String(arg.def) + "]" : " [optional]") + "."); | |
} | |
function usage() { | |
var names = [].concat(ARG_NAMES).sort(); | |
var indexed = ARGS_MAP_INDEXED.map(function(arg) { | |
names.splice(names.indexOf(arg.name), 1); | |
return arg.name; | |
}); | |
var cmd = "cs "; | |
var cmdArgs = []; | |
var defs = []; | |
names.forEach(function(argName) { | |
var arg = ARGS_MAP[argName]; | |
defs.push(usageDef(arg)); | |
cmdArgs.push("[" + (arg.short ? "-" + arg.short + "|" : "") + "--" + argName | |
+ (arg.type == "boolean" ? "" : " " + arg.type) + "]"); | |
}); | |
indexed.forEach(function(argName) { | |
var arg = ARGS_MAP[argName]; | |
var req = !!arg.req; | |
defs.unshift(usageDef(arg, true)); | |
cmdArgs.push((req ? "" : "[") + argName + (req ? "" : "]")); | |
}); | |
console.log("Usage: " + cmd + cmdArgs.join(" ")); | |
console.log("\n\n" + defs.join("\n")); | |
process.exit(1); | |
} | |
function parseArgs() { | |
var options = {}; | |
// Remove 'node' and script path from the argv list. | |
process.argv.splice(0, 2); | |
var argCount = process.argv.length; | |
if (!argCount) | |
return usage(); | |
var noNameIdx = -1; | |
var currName, currArg; | |
for (; argCount >= 0; --argCount) { | |
currName = (process.argv[argCount - 1] || "").replace(/^[-]*/g, ""); | |
currArg = ARGS_MAP[currName] || ARGS_MAP[ARGS_MAP_SHORT[currArg]]; | |
if (!currArg) { | |
// Un-named, indexed arguments. | |
currArg = ARGS_MAP_INDEXED[noNameIdx++]; | |
if (!currArg) | |
continue; | |
options[currArg.name] = process.argv[argCount]; | |
} else { | |
var bool = (currArg.type == "boolean"); | |
options[currArg.name] = bool ? true : process.argv[argCount]; | |
if (!bool) | |
--argCount; | |
} | |
} | |
ARG_NAMES.forEach(function(arg) { | |
if (typeof options[arg] != "undefined") | |
return; | |
if (DEBUG) | |
console.log("DEBUG:: unused arg: ", arg, ARGS_MAP[arg].def) | |
if (typeof ARGS_MAP[arg].def != "undefined") | |
options[arg] = ARGS_MAP[arg].def; | |
else if (ARGS_MAP[arg].type == "boolean") | |
options[arg] = false; | |
else | |
options[arg] = null; | |
}); | |
return options; | |
} | |
var count = 0; | |
var filecount = 0; | |
function parseSearchResult(res, basePath, options) { | |
var parts, file, lineno; | |
var result = ""; | |
var aLines = (typeof res == "string" ? res : "").split(/([\n\r]+)/g); | |
var i = 0; | |
var l = aLines.length; | |
for (; i < l; ++i) { | |
parts = aLines[i].split(":"); | |
if (parts.length < 3) | |
continue; | |
file = encodeURI(rtrim(parts.shift().replace(basePath, "")), "/"); | |
lineno = parseInt(parts.shift(), 10); | |
if (!lineno) | |
continue; | |
++count; | |
if (file !== PREVIOUS_FILE) { | |
filecount++; | |
if (PREVIOUS_FILE) | |
result += "\n\n"; | |
result += file + ":"; | |
PREVIOUS_FILE = file; | |
} | |
parts = parts.map(function(part) { | |
return part.replace(basePath, ""); | |
}); | |
result += "\n\t" + lineno + ": " + parts.join(":"); | |
} | |
process.stdout.write(result); | |
} | |
function buildGrepCommand(options) { | |
var cmd = GREP_CMD + " -s -r --color=never --binary-files=without-match -n " | |
+ (!options.casesensitive ? "-i " : ""); | |
var include = ""; | |
if (options.pattern) { // handles grep peculiarities with --include | |
if (options.pattern.split(",").length > 1) | |
include = "{" + options.pattern + "}"; | |
else | |
include = options.pattern; | |
} else { | |
include = "\\*{" + PATTERN_EXT + "}"; | |
} | |
if (options.maxresults) | |
cmd += "-m " + parseInt(options.maxresults, 10); | |
if (options.wholeword) | |
cmd += " -w"; | |
var query = options.query; | |
// grep has a funny way of handling new lines (that is to say, it's non-existent) | |
// if we're not doing a regex search, then we must split everything between the | |
// new lines, escape the content, and then smush it back together; due to | |
// new lines, this is also why we're now passing -P as default to grep | |
if (!options.replaceAll && !options.regexp) { | |
var splitQuery = query.split("\\n"); | |
for (var q in splitQuery) | |
splitQuery[q] = grepEscapeRegExp(splitQuery[q]); | |
query = splitQuery.join("\\n"); | |
} | |
// ticks must be double escaped for BSD grep | |
query = query.replace(new RegExp("\\\'", "g"), "'\\''"); | |
cmd += " --exclude=*{" + PATTERN_EDIR + "}*" | |
+ " --include=" + include | |
+ " '" + query.replace(/-/g, "\\-") + "'" | |
+ " \"" + escapeShell(options.path) + "\""; | |
if (options.replaceAll) { | |
if (options.replacement === undefined) | |
options.replacement = ""; | |
if (!options.regexp) | |
query = escapeRegExp(query); | |
// pipe the grep results into perl | |
cmd += " -l | xargs " + PERL_CMD | |
// print the grep result to STDOUT (to arrange in parseSearchResult()) | |
+ " -pi -e 'print STDOUT \"$ARGV:$.:$_\"" | |
// do the actual replace | |
+ " if s/" + query + "/" + options.replacement + "/mg" | |
+ (!options.casesensitive ? "i" : "" ) + ";'" | |
} | |
return cmd; | |
} | |
function main() { | |
var options = parseArgs(); | |
if (DEBUG) | |
console.log("DEBUG:: options: ", options); | |
var cmd = buildGrepCommand(options); | |
if (DEBUG) | |
console.log("DEBUG:: search command: " + cmd); | |
var child; | |
try { | |
child = Spawn("/bin/bash", ["-c", cmd]); | |
} catch (e) { | |
console.error("Could not spawn grep process"); | |
process.exit(1); | |
} | |
child.stdout.setEncoding("utf8"); | |
child.stderr.setEncoding("utf8"); | |
var buf = ""; | |
function onData(data) { | |
if (!data) | |
return; | |
buf += data; | |
if (data.indexOf("\n") >= 0) { | |
if (DEBUG) | |
console.log("DEBUG:: " + data); | |
parseSearchResult(data, options.path, options); | |
buf = ""; | |
} | |
} | |
child.stdout.on("data", onData); | |
child.stderr.on("data", onData); | |
child.on("exit", function(code, signal) { | |
process.stdout.write("\nResults: {\"count\": " + count + ", \"filecount\":" | |
+ filecount + "}\n"); | |
}); | |
} | |
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Usage: cs [-i|--casesensitive] [-m|--maxresults number] [-p|--pattern string] [-x|--regexp] [-r|--replace string] [-a|--replaceAll] [-w|--wholeword] query [path] | |
path specific path to use as the root of the search operation. Defaults to the current working dir [default=/Users/mikedeboer/Projects/jsDAV]. | |
query string to search for. The needle. Put quotes around queries with multiple words or regular expressions [required] | |
-i/ --casesensitive whether to match strings sensitive to upper or lower case characters [default=false]. | |
-m/ --maxresults the maximum number of search results to return [optional]. | |
-p/ --pattern a comma-separated list of file patterns to include in the search [optional]. | |
-x/ --regexp whether [query] should be interpreted as a regular expression [default=false]. | |
-r/ --replace string to replace a match with [default=false]. | |
-a/ --replaceAll during a replace action, replace all occurrences [default=false]. | |
-w/ --wholeword only match strings that are delimited by non-word characters [default=false]. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment