Skip to content

Instantly share code, notes, and snippets.

@mikeal
Created March 26, 2011 23:31
Show Gist options
  • Save mikeal/888736 to your computer and use it in GitHub Desktop.
Save mikeal/888736 to your computer and use it in GitHub Desktop.
function MemoryCache () {
this.cache = {};
}
MemoryCache.prototype.get(url) {
if (!this.cache[url]) return null;
return {headers:this.cache[url].headers, body:this.cache[url].body.toString()};
}
MemoryCache.prototype.set(url, headers, body) {
this.cache[url] = {headers:headers, body:new Buffer(body)};
}
MemoryCache.prototype.getHeaders(url) {
if (!this.cache[url]) return null;
return this.cache[url].headers;
}
function Spider (options) {
options.maxSockets = options.maxSockets || 4;
options.userAgent = options.userAgent || firefox;
options.cache = options.cache || new MemoryCache();
options.pool = options.pool || {maxSockets: options.maxSockets};
this.options = options;
this.currentUrl = null;
this.routers = {}
}
Spider.prototype.get = function (url) {
var self = this
, h = copy(headers)
;
if (this.currentUrl) h.referer = currentUrl;
h['user-agent'] = this.userAgent;
var c = this.cache.getHeaders(url);
if (c) {
if (c['last-modifed']) {
h['if-modified-since'] = c['last-modified'];
}
if (c.etag) {
h['if-none-match'] = c.etag;
}
}
request.get({headers:h}, function (e, resp, body) {
if (resp.statusCode === 304) {
var c_ = self.cache.get(url);
self._handler(url, {fromCache:true, headers:c_.headers, body:c_.body})
return;
}
self.cache.set(url, resp.headers, body);
self._handler(url, {fromCache:false, headers:resp.headers, body:body});
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment