Skip to content

Instantly share code, notes, and snippets.

@rcastillo
Created December 10, 2011 17:23
Show Gist options
  • Save rcastillo/1455661 to your computer and use it in GitHub Desktop.
Save rcastillo/1455661 to your computer and use it in GitHub Desktop.
Crawl Loop: sitecawl
function runCrawler(obs) {
var self = this;
self.crawling = true;
Rx.Observable.While(
function() {return self.crawling},
Rx.Observable.If(
function(){return self.crawlQueue.length > 0;},
Rx.Observable.Defer(function (){
var nextCrawlStep = self.crawlQueue.pop();
return self.selectForCrawlLinks(nextCrawlStep.Delay(self.delay))}),
Rx.Observable.Return('nop').Delay(self.maxCrawlTime))
).Subscribe(function(crawlResult){
if (crawlResult != 'nop') {
obs.OnNext(crawlResult);
} else {
if (self.crawlQueue.length ==0) {
// we have waited maxCrawlTime for new pages
// and the queue is empty, signal complete
obs.OnCompleted();
}
}
},
function(exn){
obs.OnError(exn);
});
}(obs) {
var self = this;
self.crawling = true;
Rx.Observable.While(
function() {return self.crawling},
Rx.Observable.If(
function(){return self.crawlQueue.length > 0;},
Rx.Observable.Defer(function (){
var nextCrawlStep = self.crawlQueue.pop();
return self.selectForCrawlLinks(nextCrawlStep.Delay(self.delay))}),
Rx.Observable.Return('nop').Delay(self.maxCrawlTime))
).Subscribe(function(crawlResult){
if (crawlResult != 'nop') {
obs.OnNext(crawlResult);
} else {
if (self.crawlQueue.length ==0) {
// we have waited maxCrawlTime for new pages
// and the queue is empty, signal complete
obs.OnCompleted();
}
}
},
function(exn){
obs.OnError(exn);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment