Skip to content

Instantly share code, notes, and snippets.

@mlhaufe
Last active December 19, 2015 07:09
Show Gist options
  • Save mlhaufe/5916503 to your computer and use it in GitHub Desktop.
Save mlhaufe/5916503 to your computer and use it in GitHub Desktop.
quick and dirty WScript email crawler usage: from command prompt: "cscript foo.js"
var url = "http://thenewobjective.com/blog/",
email = /\b[A-Z0-9+_.-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,6}\b/gi,
delay = 2000,
visited = {}, hostname,
baseDocument = new ActiveXObject("htmlfile"),
a = baseDocument.createElement("a");
a.href = url;
hostname = a.hostname;
Array.prototype.undupe = function(){
var seen = {}, result = [];
for (var i = 0, len = this.length; i < len; ++i){
var val = this[i];
if (!seen[val]) {
seen[val] = 1;
result.push(val);
}
}
return result;
}
function getAddresses(url, results){
a.href = url;
a.href = url.replace(a.hash,"");
if(a.hostname !== hostname){
WScript.Echo("Skipping: " + a.href);
return results;
}
if(visited[a.href]){
WScript.Echo("Already Visited: " + a.href);
return results;
}
WScript.Echo("Parsing: " + a.href);
visited[a.href] = true;
try {
WScript.Echo("Waiting");
WScript.Sleep(delay);
var document = WScript.GetObject(a.href);
while(document.readyState !== "complete")
WScript.Sleep(200);
var emails = document.documentElement.innerHTML.match(email) || [];
WScript.Echo(emails.length + " addresses found" );
for(var j = 0, len = emails.length; j < len; j++)
results[results.length] = emails[j];
var links = document.getElementsByTagName("a");
WScript.Echo(links.length + " links on page");
for (var i = 0, len = links.length; i < len; i++)
getAddresses(links[i].href,results)
} catch(e) {
WScript.Echo("Unable to parse: " + url);
WScript.Echo(e.message);
} finally{
return results
}
}
var res = getAddresses(url, []).undupe();
WScript.Echo("done. " + res.length + " total addresses found:");
WScript.Echo("-------------------------")
WScript.Echo(res.join("\n"));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment