Created
October 4, 2013 21:49
-
-
Save haveaguess/6833379 to your computer and use it in GitHub Desktop.
sdf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var log = require("./lib/debug.js"); | |
var Crawler = require("crawler").Crawler; | |
var c = new Crawler({ | |
"maxConnections":10, | |
// Global callback | |
// This will be called for each crawled page | |
"callback":function(error,result,$) { | |
// $ is a jQuery instance scoped to the server-side DOM of the page | |
$("a").each(function(index,a) { | |
// c.queue(a.href); | |
console.log(a.href); | |
}); | |
} | |
}); | |
// Queue just one URL, with default callback | |
// c.queue("http://codinginmysleep.com"); | |
// Queue a list of URLs | |
// c.queue(["http://jamendo.com/","http://tedxparis.com"]); | |
// Queue URLs with custom callbacks & parameters | |
function queueToDepth(uri, depth, callback) { | |
var crawl = { | |
"uri":uri, | |
"jQuery":true, | |
// The global callback won't be called | |
"callback":function(error,result) { | |
//callback | |
callback(depth, error, result); | |
//recurse | |
if (depth > 1) { | |
queueToDepth(uri, depth - 1, callback); | |
} | |
} | |
}; | |
c.queue([crawl]); | |
} | |
function main() { | |
var urlDepth = 1; | |
var uri = "http://www.longshanks-consulting.com/"; | |
queueToDepth(uri, urlDepth, function(depth, error, result) { | |
if (error) { | |
log.error("rer" , result); | |
log.error("err" , error); | |
log.error("depth", depth); | |
log.error("uri", uri); | |
return; | |
} | |
console.log("Grabbed (depth " + (urlDepth-depth) + ") ",result.body.length,"bytes"); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment